Search in sources :

Example 21 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class CompactionTask method decideRollupAndQueryGranularityCarryOver.

/**
 * Decide which rollup & queryCardinalities to propage for the compacted segment based on
 * the data segments given
 *
 * @param rollup                    Reference to update with the rollup value
 * @param queryGranularity          Reference to update with the queryGranularity value
 * @param queryableIndexAndSegments The segments to compact
 */
private static void decideRollupAndQueryGranularityCarryOver(SettableSupplier<Boolean> rollup, SettableSupplier<Granularity> queryGranularity, List<NonnullPair<QueryableIndex, DataSegment>> queryableIndexAndSegments) {
    final SettableSupplier<Boolean> rollupIsValid = new SettableSupplier<>(true);
    for (NonnullPair<QueryableIndex, DataSegment> pair : queryableIndexAndSegments) {
        final QueryableIndex index = pair.lhs;
        if (index.getMetadata() == null) {
            throw new RE("Index metadata doesn't exist for segment[%s]", pair.rhs.getId());
        }
        // Pick rollup value if all segments being compacted have the same, non-null, value otherwise set it to false
        if (rollupIsValid.get()) {
            Boolean isRollup = index.getMetadata().isRollup();
            if (isRollup == null) {
                rollupIsValid.set(false);
                rollup.set(false);
            } else if (rollup.get() == null) {
                rollup.set(isRollup);
            } else if (!rollup.get().equals(isRollup.booleanValue())) {
                rollupIsValid.set(false);
                rollup.set(false);
            }
        }
        // Pick the finer, non-null, of the query granularities of the segments being compacted
        Granularity current = index.getMetadata().getQueryGranularity();
        queryGranularity.set(compareWithCurrent(queryGranularity.get(), current));
    }
}
Also used : SettableSupplier(org.apache.druid.common.guava.SettableSupplier) RE(org.apache.druid.java.util.common.RE) QueryableIndex(org.apache.druid.segment.QueryableIndex) LockGranularity(org.apache.druid.indexing.common.LockGranularity) Granularity(org.apache.druid.java.util.common.granularity.Granularity) DataSegment(org.apache.druid.timeline.DataSegment)

Example 22 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class CompactionTask method createIngestionSchema.

/**
 * Generate {@link ParallelIndexIngestionSpec} from input segments.
 *
 * @return an empty list if input segments don't exist. Otherwise, a generated ingestionSpec.
 */
@VisibleForTesting
static List<ParallelIndexIngestionSpec> createIngestionSchema(final TaskToolbox toolbox, final LockGranularity lockGranularityInUse, final SegmentProvider segmentProvider, final PartitionConfigurationManager partitionConfigurationManager, @Nullable final DimensionsSpec dimensionsSpec, @Nullable final ClientCompactionTaskTransformSpec transformSpec, @Nullable final AggregatorFactory[] metricsSpec, @Nullable final ClientCompactionTaskGranularitySpec granularitySpec, final CoordinatorClient coordinatorClient, final SegmentCacheManagerFactory segmentCacheManagerFactory, final RetryPolicyFactory retryPolicyFactory, final boolean dropExisting) throws IOException, SegmentLoadingException {
    NonnullPair<Map<DataSegment, File>, List<TimelineObjectHolder<String, DataSegment>>> pair = prepareSegments(toolbox, segmentProvider, lockGranularityInUse);
    final Map<DataSegment, File> segmentFileMap = pair.lhs;
    final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = pair.rhs;
    if (timelineSegments.size() == 0) {
        return Collections.emptyList();
    }
    // find metadata for interval
    // queryableIndexAndSegments is sorted by the interval of the dataSegment
    final List<NonnullPair<QueryableIndex, DataSegment>> queryableIndexAndSegments = loadSegments(timelineSegments, segmentFileMap, toolbox.getIndexIO());
    final CompactionTuningConfig compactionTuningConfig = partitionConfigurationManager.computeTuningConfig();
    if (granularitySpec == null || granularitySpec.getSegmentGranularity() == null) {
        // original granularity
        final Map<Interval, List<NonnullPair<QueryableIndex, DataSegment>>> intervalToSegments = new TreeMap<>(Comparators.intervalsByStartThenEnd());
        queryableIndexAndSegments.forEach(p -> intervalToSegments.computeIfAbsent(p.rhs.getInterval(), k -> new ArrayList<>()).add(p));
        // unify overlapping intervals to ensure overlapping segments compacting in the same indexSpec
        List<NonnullPair<Interval, List<NonnullPair<QueryableIndex, DataSegment>>>> intervalToSegmentsUnified = new ArrayList<>();
        Interval union = null;
        List<NonnullPair<QueryableIndex, DataSegment>> segments = new ArrayList<>();
        for (Entry<Interval, List<NonnullPair<QueryableIndex, DataSegment>>> entry : intervalToSegments.entrySet()) {
            Interval cur = entry.getKey();
            if (union == null) {
                union = cur;
                segments.addAll(entry.getValue());
            } else if (union.overlaps(cur)) {
                union = Intervals.utc(union.getStartMillis(), Math.max(union.getEndMillis(), cur.getEndMillis()));
                segments.addAll(entry.getValue());
            } else {
                intervalToSegmentsUnified.add(new NonnullPair<>(union, segments));
                union = cur;
                segments = new ArrayList<>(entry.getValue());
            }
        }
        intervalToSegmentsUnified.add(new NonnullPair<>(union, segments));
        final List<ParallelIndexIngestionSpec> specs = new ArrayList<>(intervalToSegmentsUnified.size());
        for (NonnullPair<Interval, List<NonnullPair<QueryableIndex, DataSegment>>> entry : intervalToSegmentsUnified) {
            final Interval interval = entry.lhs;
            final List<NonnullPair<QueryableIndex, DataSegment>> segmentsToCompact = entry.rhs;
            // If granularitySpec is not null, then set segmentGranularity. Otherwise,
            // creates new granularitySpec and set segmentGranularity
            Granularity segmentGranularityToUse = GranularityType.fromPeriod(interval.toPeriod()).getDefaultGranularity();
            final DataSchema dataSchema = createDataSchema(segmentProvider.dataSource, segmentsToCompact, dimensionsSpec, transformSpec, metricsSpec, granularitySpec == null ? new ClientCompactionTaskGranularitySpec(segmentGranularityToUse, null, null) : granularitySpec.withSegmentGranularity(segmentGranularityToUse));
            specs.add(new ParallelIndexIngestionSpec(dataSchema, createIoConfig(toolbox, dataSchema, interval, coordinatorClient, segmentCacheManagerFactory, retryPolicyFactory, dropExisting), compactionTuningConfig));
        }
        return specs;
    } else {
        // given segment granularity
        final DataSchema dataSchema = createDataSchema(segmentProvider.dataSource, queryableIndexAndSegments, dimensionsSpec, transformSpec, metricsSpec, granularitySpec);
        return Collections.singletonList(new ParallelIndexIngestionSpec(dataSchema, createIoConfig(toolbox, dataSchema, segmentProvider.interval, coordinatorClient, segmentCacheManagerFactory, retryPolicyFactory, dropExisting), compactionTuningConfig));
    }
}
Also used : ArrayList(java.util.ArrayList) LockGranularity(org.apache.druid.indexing.common.LockGranularity) Granularity(org.apache.druid.java.util.common.granularity.Granularity) DataSegment(org.apache.druid.timeline.DataSegment) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) NonnullPair(org.apache.druid.java.util.common.NonnullPair) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) TreeMap(java.util.TreeMap) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) DataSchema(org.apache.druid.segment.indexing.DataSchema) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) QueryableIndex(org.apache.druid.segment.QueryableIndex) Map(java.util.Map) BiMap(com.google.common.collect.BiMap) HashMap(java.util.HashMap) HashBiMap(com.google.common.collect.HashBiMap) TreeMap(java.util.TreeMap) File(java.io.File) Interval(org.joda.time.Interval) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 23 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class CompactionTask method createDataSchema.

private static DataSchema createDataSchema(String dataSource, List<NonnullPair<QueryableIndex, DataSegment>> queryableIndexAndSegments, @Nullable DimensionsSpec dimensionsSpec, @Nullable ClientCompactionTaskTransformSpec transformSpec, @Nullable AggregatorFactory[] metricsSpec, @Nonnull ClientCompactionTaskGranularitySpec granularitySpec) {
    // check index metadata &
    // Decide which values to propagate (i.e. carry over) for rollup & queryGranularity
    final SettableSupplier<Boolean> rollup = new SettableSupplier<>();
    final SettableSupplier<Granularity> queryGranularity = new SettableSupplier<>();
    decideRollupAndQueryGranularityCarryOver(rollup, queryGranularity, queryableIndexAndSegments);
    final Interval totalInterval = JodaUtils.umbrellaInterval(queryableIndexAndSegments.stream().map(p -> p.rhs.getInterval()).collect(Collectors.toList()));
    final Granularity queryGranularityToUse;
    if (granularitySpec.getQueryGranularity() == null) {
        queryGranularityToUse = queryGranularity.get();
        log.info("Generate compaction task spec with segments original query granularity [%s]", queryGranularityToUse);
    } else {
        queryGranularityToUse = granularitySpec.getQueryGranularity();
        log.info("Generate compaction task spec with new query granularity overrided from input [%s]", queryGranularityToUse);
    }
    final GranularitySpec uniformGranularitySpec = new UniformGranularitySpec(Preconditions.checkNotNull(granularitySpec.getSegmentGranularity()), queryGranularityToUse, granularitySpec.isRollup() == null ? rollup.get() : granularitySpec.isRollup(), Collections.singletonList(totalInterval));
    // find unique dimensions
    final DimensionsSpec finalDimensionsSpec = dimensionsSpec == null ? createDimensionsSpec(queryableIndexAndSegments) : dimensionsSpec;
    final AggregatorFactory[] finalMetricsSpec = metricsSpec == null ? createMetricsSpec(queryableIndexAndSegments) : metricsSpec;
    return new DataSchema(dataSource, new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, "millis", null), finalDimensionsSpec, finalMetricsSpec, uniformGranularitySpec, transformSpec == null ? null : new TransformSpec(transformSpec.getFilter(), null));
}
Also used : LockGranularity(org.apache.druid.indexing.common.LockGranularity) Granularity(org.apache.druid.java.util.common.granularity.Granularity) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TransformSpec(org.apache.druid.segment.transform.TransformSpec) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Interval(org.joda.time.Interval)

Example 24 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class AbstractBatchIndexTask method determineSegmentGranularity.

private LockGranularityDetermineResult determineSegmentGranularity(List<DataSegment> segments) {
    if (segments.isEmpty()) {
        log.info("Using segment lock for empty segments");
        // Note that we should get any lock before data ingestion if we are supposed to use timChunk lock.
        return new LockGranularityDetermineResult(LockGranularity.SEGMENT, null, Collections.emptyList());
    }
    if (requireLockExistingSegments()) {
        final Granularity granularityFromSegments = findGranularityFromSegments(segments);
        @Nullable final Granularity segmentGranularityFromSpec = getSegmentGranularity();
        final List<Interval> intervals = segments.stream().map(DataSegment::getInterval).collect(Collectors.toList());
        if (granularityFromSegments == null || segmentGranularityFromSpec != null && (!granularityFromSegments.equals(segmentGranularityFromSpec) || segments.stream().anyMatch(segment -> !segmentGranularityFromSpec.isAligned(segment.getInterval())))) {
            // This case is one of the followings:
            // 1) Segments have different granularities.
            // 2) Segment granularity in ingestion spec is different from the one of existig segments.
            // 3) Some existing segments are not aligned with the segment granularity in the ingestion spec.
            log.info("Detected segmentGranularity change. Using timeChunk lock");
            return new LockGranularityDetermineResult(LockGranularity.TIME_CHUNK, intervals, null);
        } else {
            // Use segment lock
            // Create a timeline to find latest segments only
            final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(segments);
            Set<DataSegment> segmentsToLock = timeline.findNonOvershadowedObjectsInInterval(JodaUtils.umbrellaInterval(intervals), Partitions.ONLY_COMPLETE);
            log.info("No segmentGranularity change detected and it's not perfect rollup. Using segment lock");
            return new LockGranularityDetermineResult(LockGranularity.SEGMENT, null, new ArrayList<>(segmentsToLock));
        }
    } else {
        // Set useSegmentLock even though we don't get any locks.
        // Note that we should get any lock before data ingestion if we are supposed to use timChunk lock.
        log.info("Using segment lock since we don't have to lock existing segments");
        return new LockGranularityDetermineResult(LockGranularity.SEGMENT, null, Collections.emptyList());
    }
}
Also used : LockGranularity(org.apache.druid.indexing.common.LockGranularity) Granularity(org.apache.druid.java.util.common.granularity.Granularity) IntervalsByGranularity(org.apache.druid.java.util.common.granularity.IntervalsByGranularity) DataSegment(org.apache.druid.timeline.DataSegment) Nullable(javax.annotation.Nullable) Interval(org.joda.time.Interval)

Example 25 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class SegmentAllocateActionTest method testSameIntervalWithSegmentGranularity.

@Test
public void testSameIntervalWithSegmentGranularity() {
    final Task task = NoopTask.create();
    taskActionTestKit.getTaskLockbox().add(task);
    Granularity segmentGranularity = new PeriodGranularity(Period.hours(1), null, DateTimes.inferTzFromString("Asia/Shanghai"));
    final SegmentIdWithShardSpec id1 = allocate(task, PARTY_TIME, Granularities.MINUTE, segmentGranularity, "s1", null);
    final SegmentIdWithShardSpec id2 = allocate(task, PARTY_TIME, Granularities.MINUTE, segmentGranularity, "s2", null);
    Assert.assertNotNull(id1);
    Assert.assertNotNull(id2);
}
Also used : Task(org.apache.druid.indexing.common.task.Task) NoopTask(org.apache.druid.indexing.common.task.NoopTask) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) Granularity(org.apache.druid.java.util.common.granularity.Granularity) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) LockGranularity(org.apache.druid.indexing.common.LockGranularity) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) Test(org.junit.Test)

Aggregations

Granularity (org.apache.druid.java.util.common.granularity.Granularity)58 Interval (org.joda.time.Interval)27 ArrayList (java.util.ArrayList)22 DateTime (org.joda.time.DateTime)19 Test (org.junit.Test)16 List (java.util.List)14 Map (java.util.Map)14 HashMap (java.util.HashMap)13 Nullable (javax.annotation.Nullable)12 PeriodGranularity (org.apache.druid.java.util.common.granularity.PeriodGranularity)12 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)12 Period (org.joda.time.Period)11 ISE (org.apache.druid.java.util.common.ISE)8 Result (org.apache.druid.query.Result)8 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)7 ImmutableList (com.google.common.collect.ImmutableList)7 VisibleForTesting (com.google.common.annotations.VisibleForTesting)6 ClientCompactionTaskGranularitySpec (org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec)6 LockGranularity (org.apache.druid.indexing.common.LockGranularity)6 Sequence (org.apache.druid.java.util.common.guava.Sequence)6