Search in sources :

Example 51 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class HashJoinSegmentStorageAdapter method makeCursors.

@Override
public Sequence<Cursor> makeCursors(@Nullable final Filter filter, @Nonnull final Interval interval, @Nonnull final VirtualColumns virtualColumns, @Nonnull final Granularity gran, final boolean descending, @Nullable final QueryMetrics<?> queryMetrics) {
    final Filter combinedFilter = baseFilterAnd(filter);
    if (clauses.isEmpty()) {
        return baseAdapter.makeCursors(combinedFilter, interval, virtualColumns, gran, descending, queryMetrics);
    }
    // Filter pre-analysis key implied by the call to "makeCursors". We need to sanity-check that it matches
    // the actual pre-analysis that was done. Note: we can't infer a rewrite config from the "makeCursors" call (it
    // requires access to the query context) so we'll need to skip sanity-checking it, by re-using the one present
    // in the cached key.)
    final JoinFilterPreAnalysisKey keyIn = new JoinFilterPreAnalysisKey(joinFilterPreAnalysis.getKey().getRewriteConfig(), clauses, virtualColumns, combinedFilter);
    final JoinFilterPreAnalysisKey keyCached = joinFilterPreAnalysis.getKey();
    if (!keyIn.equals(keyCached)) {
        // It is a bug if this happens. The implied key and the cached key should always match.
        throw new ISE("Pre-analysis mismatch, cannot execute query");
    }
    final List<VirtualColumn> preJoinVirtualColumns = new ArrayList<>();
    final List<VirtualColumn> postJoinVirtualColumns = new ArrayList<>();
    determineBaseColumnsWithPreAndPostJoinVirtualColumns(virtualColumns, preJoinVirtualColumns, postJoinVirtualColumns);
    // We merge the filter on base table specified by the user and filter on the base table that is pushed from
    // the join
    JoinFilterSplit joinFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis, baseFilter);
    preJoinVirtualColumns.addAll(joinFilterSplit.getPushDownVirtualColumns());
    final Sequence<Cursor> baseCursorSequence = baseAdapter.makeCursors(joinFilterSplit.getBaseTableFilter().isPresent() ? joinFilterSplit.getBaseTableFilter().get() : null, interval, VirtualColumns.create(preJoinVirtualColumns), gran, descending, queryMetrics);
    Closer joinablesCloser = Closer.create();
    return Sequences.<Cursor, Cursor>map(baseCursorSequence, cursor -> {
        assert cursor != null;
        Cursor retVal = cursor;
        for (JoinableClause clause : clauses) {
            retVal = HashJoinEngine.makeJoinCursor(retVal, clause, descending, joinablesCloser);
        }
        return PostJoinCursor.wrap(retVal, VirtualColumns.create(postJoinVirtualColumns), joinFilterSplit.getJoinTableFilter().orElse(null));
    }).withBaggage(joinablesCloser);
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) Indexed(org.apache.druid.segment.data.Indexed) Arrays(java.util.Arrays) Granularity(org.apache.druid.java.util.common.granularity.Granularity) QueryMetrics(org.apache.druid.query.QueryMetrics) Metadata(org.apache.druid.segment.Metadata) StorageAdapter(org.apache.druid.segment.StorageAdapter) ArrayList(java.util.ArrayList) JoinFilterSplit(org.apache.druid.segment.join.filter.JoinFilterSplit) HashSet(java.util.HashSet) VectorCursor(org.apache.druid.segment.vector.VectorCursor) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ListIndexed(org.apache.druid.segment.data.ListIndexed) JoinFilterPreAnalysisKey(org.apache.druid.segment.join.filter.JoinFilterPreAnalysisKey) Nonnull(javax.annotation.Nonnull) Sequences(org.apache.druid.java.util.common.guava.Sequences) Nullable(javax.annotation.Nullable) LinkedHashSet(java.util.LinkedHashSet) Sequence(org.apache.druid.java.util.common.guava.Sequence) VirtualColumns(org.apache.druid.segment.VirtualColumns) Closer(org.apache.druid.java.util.common.io.Closer) VirtualColumn(org.apache.druid.segment.VirtualColumn) DateTime(org.joda.time.DateTime) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) JoinFilterPreAnalysis(org.apache.druid.segment.join.filter.JoinFilterPreAnalysis) List(java.util.List) Cursor(org.apache.druid.segment.Cursor) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) Optional(java.util.Optional) JoinFilterAnalyzer(org.apache.druid.segment.join.filter.JoinFilterAnalyzer) Filters(org.apache.druid.segment.filter.Filters) Filter(org.apache.druid.query.filter.Filter) Filter(org.apache.druid.query.filter.Filter) JoinFilterPreAnalysisKey(org.apache.druid.segment.join.filter.JoinFilterPreAnalysisKey) JoinFilterSplit(org.apache.druid.segment.join.filter.JoinFilterSplit) ArrayList(java.util.ArrayList) ISE(org.apache.druid.java.util.common.ISE) VirtualColumn(org.apache.druid.segment.VirtualColumn) VectorCursor(org.apache.druid.segment.vector.VectorCursor) Cursor(org.apache.druid.segment.Cursor)

Example 52 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class Metadata method merge.

// arbitrary key-value pairs from the metadata just follow the semantics of last one wins if same
// key exists in multiple input Metadata containers
// for others e.g. Aggregators, appropriate merging is done
@Nullable
public static Metadata merge(@Nullable List<Metadata> toBeMerged, @Nullable AggregatorFactory[] overrideMergedAggregators) {
    if (toBeMerged == null || toBeMerged.size() == 0) {
        return null;
    }
    boolean foundSomeMetadata = false;
    Map<String, Object> mergedContainer = new HashMap<>();
    List<AggregatorFactory[]> aggregatorsToMerge = overrideMergedAggregators == null ? new ArrayList<>() : null;
    List<TimestampSpec> timestampSpecsToMerge = new ArrayList<>();
    List<Granularity> gransToMerge = new ArrayList<>();
    List<Boolean> rollupToMerge = new ArrayList<>();
    for (Metadata metadata : toBeMerged) {
        if (metadata != null) {
            foundSomeMetadata = true;
            if (aggregatorsToMerge != null) {
                aggregatorsToMerge.add(metadata.getAggregators());
            }
            if (timestampSpecsToMerge != null && metadata.getTimestampSpec() != null) {
                timestampSpecsToMerge.add(metadata.getTimestampSpec());
            }
            if (gransToMerge != null) {
                gransToMerge.add(metadata.getQueryGranularity());
            }
            if (rollupToMerge != null) {
                rollupToMerge.add(metadata.isRollup());
            }
            mergedContainer.putAll(metadata.container);
        } else {
            // if metadata and hence aggregators and queryGranularity for some segment being merged are unknown then
            // final merged segment should not have same in metadata
            aggregatorsToMerge = null;
            timestampSpecsToMerge = null;
            gransToMerge = null;
            rollupToMerge = null;
        }
    }
    if (!foundSomeMetadata) {
        return null;
    }
    final AggregatorFactory[] mergedAggregators = aggregatorsToMerge == null ? overrideMergedAggregators : AggregatorFactory.mergeAggregators(aggregatorsToMerge);
    final TimestampSpec mergedTimestampSpec = timestampSpecsToMerge == null ? null : TimestampSpec.mergeTimestampSpec(timestampSpecsToMerge);
    final Granularity mergedGranularity = gransToMerge == null ? null : Granularity.mergeGranularities(gransToMerge);
    Boolean rollup = null;
    if (rollupToMerge != null && !rollupToMerge.isEmpty()) {
        rollup = rollupToMerge.get(0);
        for (Boolean r : rollupToMerge) {
            if (r == null) {
                rollup = null;
                break;
            } else if (!r.equals(rollup)) {
                rollup = null;
                break;
            } else {
                rollup = r;
            }
        }
    }
    return new Metadata(mergedContainer, mergedAggregators, mergedTimestampSpec, mergedGranularity, rollup);
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Granularity(org.apache.druid.java.util.common.granularity.Granularity) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) Nullable(javax.annotation.Nullable)

Example 53 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class ResultGranularTimestampComparatorTest method testCompareHour.

@Test
public void testCompareHour() {
    Result<Object> res = new Result<Object>(time, null);
    Result<Object> same = new Result<Object>(time.plusMinutes(55), null);
    Result<Object> greater = new Result<Object>(time.plusHours(1), null);
    Result<Object> less = new Result<Object>(time.minusHours(1), null);
    Granularity hour = Granularities.HOUR;
    Assert.assertEquals(ResultGranularTimestampComparator.create(hour, descending).compare(res, same), 0);
    Assert.assertEquals(ResultGranularTimestampComparator.create(hour, descending).compare(res, greater), descending ? 1 : -1);
    Assert.assertEquals(ResultGranularTimestampComparator.create(hour, descending).compare(res, less), descending ? -1 : 1);
}
Also used : Granularity(org.apache.druid.java.util.common.granularity.Granularity) Test(org.junit.Test)

Example 54 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class NewestSegmentFirstIterator method findInitialSearchInterval.

/**
 * Returns the initial searchInterval which is {@code (timeline.first().start, timeline.last().end - skipOffset)}.
 *
 * @param timeline      timeline of a dataSource
 * @param skipIntervals intervals to skip
 *
 * @return found interval to search or null if it's not found
 */
private static List<Interval> findInitialSearchInterval(VersionedIntervalTimeline<String, DataSegment> timeline, Period skipOffset, Granularity configuredSegmentGranularity, @Nullable List<Interval> skipIntervals) {
    Preconditions.checkArgument(timeline != null && !timeline.isEmpty(), "timeline should not be null or empty");
    Preconditions.checkNotNull(skipOffset, "skipOffset");
    final TimelineObjectHolder<String, DataSegment> first = Preconditions.checkNotNull(timeline.first(), "first");
    final TimelineObjectHolder<String, DataSegment> last = Preconditions.checkNotNull(timeline.last(), "last");
    final List<Interval> fullSkipIntervals = sortAndAddSkipIntervalFromLatest(last.getInterval().getEnd(), skipOffset, configuredSegmentGranularity, skipIntervals);
    final Interval totalInterval = new Interval(first.getInterval().getStart(), last.getInterval().getEnd());
    final List<Interval> filteredInterval = filterSkipIntervals(totalInterval, fullSkipIntervals);
    final List<Interval> searchIntervals = new ArrayList<>();
    for (Interval lookupInterval : filteredInterval) {
        final List<DataSegment> segments = timeline.findNonOvershadowedObjectsInInterval(lookupInterval, Partitions.ONLY_COMPLETE).stream().filter(segment -> lookupInterval.contains(segment.getInterval())).collect(Collectors.toList());
        if (segments.isEmpty()) {
            continue;
        }
        DateTime searchStart = segments.stream().map(segment -> segment.getId().getIntervalStart()).min(Comparator.naturalOrder()).orElseThrow(AssertionError::new);
        DateTime searchEnd = segments.stream().map(segment -> segment.getId().getIntervalEnd()).max(Comparator.naturalOrder()).orElseThrow(AssertionError::new);
        searchIntervals.add(new Interval(searchStart, searchEnd));
    }
    return searchIntervals;
}
Also used : Arrays(java.util.Arrays) Comparators(org.apache.druid.java.util.common.guava.Comparators) NumberedPartitionChunk(org.apache.druid.timeline.partition.NumberedPartitionChunk) Partitions(org.apache.druid.timeline.Partitions) PriorityQueue(java.util.PriorityQueue) IndexSpec(org.apache.druid.segment.IndexSpec) CompactionState(org.apache.druid.timeline.CompactionState) DataSourceCompactionConfig(org.apache.druid.server.coordinator.DataSourceCompactionConfig) JodaUtils(org.apache.druid.java.util.common.JodaUtils) Map(java.util.Map) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) DateTimes(org.apache.druid.java.util.common.DateTimes) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) ClientCompactionTaskQueryTuningConfig(org.apache.druid.client.indexing.ClientCompactionTaskQueryTuningConfig) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) DimFilter(org.apache.druid.query.filter.DimFilter) DataSegment(org.apache.druid.timeline.DataSegment) Logger(org.apache.druid.java.util.common.logger.Logger) Streams(org.apache.druid.utils.Streams) Granularity(org.apache.druid.java.util.common.granularity.Granularity) Intervals(org.apache.druid.java.util.common.Intervals) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) NoSuchElementException(java.util.NoSuchElementException) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) Period(org.joda.time.Period) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) Iterator(java.util.Iterator) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) CompactionStatistics(org.apache.druid.server.coordinator.CompactionStatistics) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Maps(com.google.common.collect.Maps) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) SegmentUtils(org.apache.druid.segment.SegmentUtils) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Collections(java.util.Collections) ArrayUtils(org.apache.commons.lang.ArrayUtils) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) Interval(org.joda.time.Interval)

Example 55 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class NewestSegmentFirstIterator method needsCompaction.

private boolean needsCompaction(DataSourceCompactionConfig config, SegmentsToCompact candidates) {
    Preconditions.checkState(!candidates.isEmpty(), "Empty candidates");
    final ClientCompactionTaskQueryTuningConfig tuningConfig = ClientCompactionTaskQueryTuningConfig.from(config.getTuningConfig(), config.getMaxRowsPerSegment());
    final PartitionsSpec partitionsSpecFromConfig = findPartitionsSpecFromConfig(tuningConfig);
    final CompactionState lastCompactionState = candidates.segments.get(0).getLastCompactionState();
    if (lastCompactionState == null) {
        log.info("Candidate segment[%s] is not compacted yet. Needs compaction.", candidates.segments.get(0).getId());
        return true;
    }
    final boolean allCandidatesHaveSameLastCompactionState = candidates.segments.stream().allMatch(segment -> lastCompactionState.equals(segment.getLastCompactionState()));
    if (!allCandidatesHaveSameLastCompactionState) {
        log.info("[%s] Candidate segments were compacted with different partitions spec. Needs compaction.", candidates.segments.size());
        log.debugSegments(candidates.segments, "Candidate segments compacted with different partiton spec");
        return true;
    }
    final PartitionsSpec segmentPartitionsSpec = lastCompactionState.getPartitionsSpec();
    final IndexSpec segmentIndexSpec = objectMapper.convertValue(lastCompactionState.getIndexSpec(), IndexSpec.class);
    final IndexSpec configuredIndexSpec;
    if (tuningConfig.getIndexSpec() == null) {
        configuredIndexSpec = new IndexSpec();
    } else {
        configuredIndexSpec = tuningConfig.getIndexSpec();
    }
    if (!Objects.equals(partitionsSpecFromConfig, segmentPartitionsSpec)) {
        log.info("Configured partitionsSpec[%s] is differenet from " + "the partitionsSpec[%s] of segments. Needs compaction.", partitionsSpecFromConfig, segmentPartitionsSpec);
        return true;
    }
    // segmentIndexSpec cannot be null.
    if (!segmentIndexSpec.equals(configuredIndexSpec)) {
        log.info("Configured indexSpec[%s] is different from the one[%s] of segments. Needs compaction", configuredIndexSpec, segmentIndexSpec);
        return true;
    }
    if (config.getGranularitySpec() != null) {
        final ClientCompactionTaskGranularitySpec existingGranularitySpec = lastCompactionState.getGranularitySpec() != null ? objectMapper.convertValue(lastCompactionState.getGranularitySpec(), ClientCompactionTaskGranularitySpec.class) : null;
        // Checks for segmentGranularity
        if (config.getGranularitySpec().getSegmentGranularity() != null) {
            final Granularity existingSegmentGranularity = existingGranularitySpec != null ? existingGranularitySpec.getSegmentGranularity() : null;
            if (existingSegmentGranularity == null) {
                // Candidate segments were all compacted without segment granularity set.
                // We need to check if all segments have the same segment granularity as the configured segment granularity.
                boolean needsCompaction = candidates.segments.stream().anyMatch(segment -> !config.getGranularitySpec().getSegmentGranularity().isAligned(segment.getInterval()));
                if (needsCompaction) {
                    log.info("Segments were previously compacted but without segmentGranularity in auto compaction." + " Configured segmentGranularity[%s] is different from granularity implied by segment intervals. Needs compaction", config.getGranularitySpec().getSegmentGranularity());
                    return true;
                }
            } else if (!config.getGranularitySpec().getSegmentGranularity().equals(existingSegmentGranularity)) {
                log.info("Configured segmentGranularity[%s] is different from the segmentGranularity[%s] of segments. Needs compaction", config.getGranularitySpec().getSegmentGranularity(), existingSegmentGranularity);
                return true;
            }
        }
        // Checks for rollup
        if (config.getGranularitySpec().isRollup() != null) {
            final Boolean existingRollup = existingGranularitySpec != null ? existingGranularitySpec.isRollup() : null;
            if (existingRollup == null || !config.getGranularitySpec().isRollup().equals(existingRollup)) {
                log.info("Configured rollup[%s] is different from the rollup[%s] of segments. Needs compaction", config.getGranularitySpec().isRollup(), existingRollup);
                return true;
            }
        }
        // Checks for queryGranularity
        if (config.getGranularitySpec().getQueryGranularity() != null) {
            final Granularity existingQueryGranularity = existingGranularitySpec != null ? existingGranularitySpec.getQueryGranularity() : null;
            if (!config.getGranularitySpec().getQueryGranularity().equals(existingQueryGranularity)) {
                log.info("Configured queryGranularity[%s] is different from the queryGranularity[%s] of segments. Needs compaction", config.getGranularitySpec().getQueryGranularity(), existingQueryGranularity);
                return true;
            }
        }
    }
    if (config.getDimensionsSpec() != null) {
        final DimensionsSpec existingDimensionsSpec = lastCompactionState.getDimensionsSpec();
        // Checks for list of dimensions
        if (config.getDimensionsSpec().getDimensions() != null) {
            final List<DimensionSchema> existingDimensions = existingDimensionsSpec != null ? existingDimensionsSpec.getDimensions() : null;
            if (!config.getDimensionsSpec().getDimensions().equals(existingDimensions)) {
                log.info("Configured dimensionsSpec is different from the dimensionsSpec of segments. Needs compaction");
                return true;
            }
        }
    }
    if (config.getTransformSpec() != null) {
        final ClientCompactionTaskTransformSpec existingTransformSpec = lastCompactionState.getTransformSpec() != null ? objectMapper.convertValue(lastCompactionState.getTransformSpec(), ClientCompactionTaskTransformSpec.class) : null;
        // Checks for filters
        if (config.getTransformSpec().getFilter() != null) {
            final DimFilter existingFilters = existingTransformSpec != null ? existingTransformSpec.getFilter() : null;
            if (!config.getTransformSpec().getFilter().equals(existingFilters)) {
                log.info("Configured filter[%s] is different from the filter[%s] of segments. Needs compaction", config.getTransformSpec().getFilter(), existingFilters);
                return true;
            }
        }
    }
    if (ArrayUtils.isNotEmpty(config.getMetricsSpec())) {
        final AggregatorFactory[] existingMetricsSpec = lastCompactionState.getMetricsSpec() == null || lastCompactionState.getMetricsSpec().isEmpty() ? null : objectMapper.convertValue(lastCompactionState.getMetricsSpec(), AggregatorFactory[].class);
        if (existingMetricsSpec == null || !Arrays.deepEquals(config.getMetricsSpec(), existingMetricsSpec)) {
            log.info("Configured metricsSpec[%s] is different from the metricsSpec[%s] of segments. Needs compaction", Arrays.toString(config.getMetricsSpec()), Arrays.toString(existingMetricsSpec));
            return true;
        }
    }
    return false;
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) ClientCompactionTaskQueryTuningConfig(org.apache.druid.client.indexing.ClientCompactionTaskQueryTuningConfig) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) DimFilter(org.apache.druid.query.filter.DimFilter)

Aggregations

Granularity (org.apache.druid.java.util.common.granularity.Granularity)58 Interval (org.joda.time.Interval)27 ArrayList (java.util.ArrayList)22 DateTime (org.joda.time.DateTime)19 Test (org.junit.Test)16 List (java.util.List)14 Map (java.util.Map)14 HashMap (java.util.HashMap)13 Nullable (javax.annotation.Nullable)12 PeriodGranularity (org.apache.druid.java.util.common.granularity.PeriodGranularity)12 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)12 Period (org.joda.time.Period)11 ISE (org.apache.druid.java.util.common.ISE)8 Result (org.apache.druid.query.Result)8 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)7 ImmutableList (com.google.common.collect.ImmutableList)7 VisibleForTesting (com.google.common.annotations.VisibleForTesting)6 ClientCompactionTaskGranularitySpec (org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec)6 LockGranularity (org.apache.druid.indexing.common.LockGranularity)6 Sequence (org.apache.druid.java.util.common.guava.Sequence)6