use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class CompactionTask method decideRollupAndQueryGranularityCarryOver.
/**
* Decide which rollup & queryCardinalities to propage for the compacted segment based on
* the data segments given
*
* @param rollup Reference to update with the rollup value
* @param queryGranularity Reference to update with the queryGranularity value
* @param queryableIndexAndSegments The segments to compact
*/
private static void decideRollupAndQueryGranularityCarryOver(SettableSupplier<Boolean> rollup, SettableSupplier<Granularity> queryGranularity, List<NonnullPair<QueryableIndex, DataSegment>> queryableIndexAndSegments) {
final SettableSupplier<Boolean> rollupIsValid = new SettableSupplier<>(true);
for (NonnullPair<QueryableIndex, DataSegment> pair : queryableIndexAndSegments) {
final QueryableIndex index = pair.lhs;
if (index.getMetadata() == null) {
throw new RE("Index metadata doesn't exist for segment[%s]", pair.rhs.getId());
}
// Pick rollup value if all segments being compacted have the same, non-null, value otherwise set it to false
if (rollupIsValid.get()) {
Boolean isRollup = index.getMetadata().isRollup();
if (isRollup == null) {
rollupIsValid.set(false);
rollup.set(false);
} else if (rollup.get() == null) {
rollup.set(isRollup);
} else if (!rollup.get().equals(isRollup.booleanValue())) {
rollupIsValid.set(false);
rollup.set(false);
}
}
// Pick the finer, non-null, of the query granularities of the segments being compacted
Granularity current = index.getMetadata().getQueryGranularity();
queryGranularity.set(compareWithCurrent(queryGranularity.get(), current));
}
}
use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class CompactionTask method createIngestionSchema.
/**
* Generate {@link ParallelIndexIngestionSpec} from input segments.
*
* @return an empty list if input segments don't exist. Otherwise, a generated ingestionSpec.
*/
@VisibleForTesting
static List<ParallelIndexIngestionSpec> createIngestionSchema(final TaskToolbox toolbox, final LockGranularity lockGranularityInUse, final SegmentProvider segmentProvider, final PartitionConfigurationManager partitionConfigurationManager, @Nullable final DimensionsSpec dimensionsSpec, @Nullable final ClientCompactionTaskTransformSpec transformSpec, @Nullable final AggregatorFactory[] metricsSpec, @Nullable final ClientCompactionTaskGranularitySpec granularitySpec, final CoordinatorClient coordinatorClient, final SegmentCacheManagerFactory segmentCacheManagerFactory, final RetryPolicyFactory retryPolicyFactory, final boolean dropExisting) throws IOException, SegmentLoadingException {
NonnullPair<Map<DataSegment, File>, List<TimelineObjectHolder<String, DataSegment>>> pair = prepareSegments(toolbox, segmentProvider, lockGranularityInUse);
final Map<DataSegment, File> segmentFileMap = pair.lhs;
final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = pair.rhs;
if (timelineSegments.size() == 0) {
return Collections.emptyList();
}
// find metadata for interval
// queryableIndexAndSegments is sorted by the interval of the dataSegment
final List<NonnullPair<QueryableIndex, DataSegment>> queryableIndexAndSegments = loadSegments(timelineSegments, segmentFileMap, toolbox.getIndexIO());
final CompactionTuningConfig compactionTuningConfig = partitionConfigurationManager.computeTuningConfig();
if (granularitySpec == null || granularitySpec.getSegmentGranularity() == null) {
// original granularity
final Map<Interval, List<NonnullPair<QueryableIndex, DataSegment>>> intervalToSegments = new TreeMap<>(Comparators.intervalsByStartThenEnd());
queryableIndexAndSegments.forEach(p -> intervalToSegments.computeIfAbsent(p.rhs.getInterval(), k -> new ArrayList<>()).add(p));
// unify overlapping intervals to ensure overlapping segments compacting in the same indexSpec
List<NonnullPair<Interval, List<NonnullPair<QueryableIndex, DataSegment>>>> intervalToSegmentsUnified = new ArrayList<>();
Interval union = null;
List<NonnullPair<QueryableIndex, DataSegment>> segments = new ArrayList<>();
for (Entry<Interval, List<NonnullPair<QueryableIndex, DataSegment>>> entry : intervalToSegments.entrySet()) {
Interval cur = entry.getKey();
if (union == null) {
union = cur;
segments.addAll(entry.getValue());
} else if (union.overlaps(cur)) {
union = Intervals.utc(union.getStartMillis(), Math.max(union.getEndMillis(), cur.getEndMillis()));
segments.addAll(entry.getValue());
} else {
intervalToSegmentsUnified.add(new NonnullPair<>(union, segments));
union = cur;
segments = new ArrayList<>(entry.getValue());
}
}
intervalToSegmentsUnified.add(new NonnullPair<>(union, segments));
final List<ParallelIndexIngestionSpec> specs = new ArrayList<>(intervalToSegmentsUnified.size());
for (NonnullPair<Interval, List<NonnullPair<QueryableIndex, DataSegment>>> entry : intervalToSegmentsUnified) {
final Interval interval = entry.lhs;
final List<NonnullPair<QueryableIndex, DataSegment>> segmentsToCompact = entry.rhs;
// If granularitySpec is not null, then set segmentGranularity. Otherwise,
// creates new granularitySpec and set segmentGranularity
Granularity segmentGranularityToUse = GranularityType.fromPeriod(interval.toPeriod()).getDefaultGranularity();
final DataSchema dataSchema = createDataSchema(segmentProvider.dataSource, segmentsToCompact, dimensionsSpec, transformSpec, metricsSpec, granularitySpec == null ? new ClientCompactionTaskGranularitySpec(segmentGranularityToUse, null, null) : granularitySpec.withSegmentGranularity(segmentGranularityToUse));
specs.add(new ParallelIndexIngestionSpec(dataSchema, createIoConfig(toolbox, dataSchema, interval, coordinatorClient, segmentCacheManagerFactory, retryPolicyFactory, dropExisting), compactionTuningConfig));
}
return specs;
} else {
// given segment granularity
final DataSchema dataSchema = createDataSchema(segmentProvider.dataSource, queryableIndexAndSegments, dimensionsSpec, transformSpec, metricsSpec, granularitySpec);
return Collections.singletonList(new ParallelIndexIngestionSpec(dataSchema, createIoConfig(toolbox, dataSchema, segmentProvider.interval, coordinatorClient, segmentCacheManagerFactory, retryPolicyFactory, dropExisting), compactionTuningConfig));
}
}
use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class CompactionTask method createDataSchema.
private static DataSchema createDataSchema(String dataSource, List<NonnullPair<QueryableIndex, DataSegment>> queryableIndexAndSegments, @Nullable DimensionsSpec dimensionsSpec, @Nullable ClientCompactionTaskTransformSpec transformSpec, @Nullable AggregatorFactory[] metricsSpec, @Nonnull ClientCompactionTaskGranularitySpec granularitySpec) {
// check index metadata &
// Decide which values to propagate (i.e. carry over) for rollup & queryGranularity
final SettableSupplier<Boolean> rollup = new SettableSupplier<>();
final SettableSupplier<Granularity> queryGranularity = new SettableSupplier<>();
decideRollupAndQueryGranularityCarryOver(rollup, queryGranularity, queryableIndexAndSegments);
final Interval totalInterval = JodaUtils.umbrellaInterval(queryableIndexAndSegments.stream().map(p -> p.rhs.getInterval()).collect(Collectors.toList()));
final Granularity queryGranularityToUse;
if (granularitySpec.getQueryGranularity() == null) {
queryGranularityToUse = queryGranularity.get();
log.info("Generate compaction task spec with segments original query granularity [%s]", queryGranularityToUse);
} else {
queryGranularityToUse = granularitySpec.getQueryGranularity();
log.info("Generate compaction task spec with new query granularity overrided from input [%s]", queryGranularityToUse);
}
final GranularitySpec uniformGranularitySpec = new UniformGranularitySpec(Preconditions.checkNotNull(granularitySpec.getSegmentGranularity()), queryGranularityToUse, granularitySpec.isRollup() == null ? rollup.get() : granularitySpec.isRollup(), Collections.singletonList(totalInterval));
// find unique dimensions
final DimensionsSpec finalDimensionsSpec = dimensionsSpec == null ? createDimensionsSpec(queryableIndexAndSegments) : dimensionsSpec;
final AggregatorFactory[] finalMetricsSpec = metricsSpec == null ? createMetricsSpec(queryableIndexAndSegments) : metricsSpec;
return new DataSchema(dataSource, new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, "millis", null), finalDimensionsSpec, finalMetricsSpec, uniformGranularitySpec, transformSpec == null ? null : new TransformSpec(transformSpec.getFilter(), null));
}
use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class AbstractBatchIndexTask method determineSegmentGranularity.
private LockGranularityDetermineResult determineSegmentGranularity(List<DataSegment> segments) {
if (segments.isEmpty()) {
log.info("Using segment lock for empty segments");
// Note that we should get any lock before data ingestion if we are supposed to use timChunk lock.
return new LockGranularityDetermineResult(LockGranularity.SEGMENT, null, Collections.emptyList());
}
if (requireLockExistingSegments()) {
final Granularity granularityFromSegments = findGranularityFromSegments(segments);
@Nullable final Granularity segmentGranularityFromSpec = getSegmentGranularity();
final List<Interval> intervals = segments.stream().map(DataSegment::getInterval).collect(Collectors.toList());
if (granularityFromSegments == null || segmentGranularityFromSpec != null && (!granularityFromSegments.equals(segmentGranularityFromSpec) || segments.stream().anyMatch(segment -> !segmentGranularityFromSpec.isAligned(segment.getInterval())))) {
// This case is one of the followings:
// 1) Segments have different granularities.
// 2) Segment granularity in ingestion spec is different from the one of existig segments.
// 3) Some existing segments are not aligned with the segment granularity in the ingestion spec.
log.info("Detected segmentGranularity change. Using timeChunk lock");
return new LockGranularityDetermineResult(LockGranularity.TIME_CHUNK, intervals, null);
} else {
// Use segment lock
// Create a timeline to find latest segments only
final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(segments);
Set<DataSegment> segmentsToLock = timeline.findNonOvershadowedObjectsInInterval(JodaUtils.umbrellaInterval(intervals), Partitions.ONLY_COMPLETE);
log.info("No segmentGranularity change detected and it's not perfect rollup. Using segment lock");
return new LockGranularityDetermineResult(LockGranularity.SEGMENT, null, new ArrayList<>(segmentsToLock));
}
} else {
// Set useSegmentLock even though we don't get any locks.
// Note that we should get any lock before data ingestion if we are supposed to use timChunk lock.
log.info("Using segment lock since we don't have to lock existing segments");
return new LockGranularityDetermineResult(LockGranularity.SEGMENT, null, Collections.emptyList());
}
}
use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class SegmentAllocateActionTest method testSameIntervalWithSegmentGranularity.
@Test
public void testSameIntervalWithSegmentGranularity() {
final Task task = NoopTask.create();
taskActionTestKit.getTaskLockbox().add(task);
Granularity segmentGranularity = new PeriodGranularity(Period.hours(1), null, DateTimes.inferTzFromString("Asia/Shanghai"));
final SegmentIdWithShardSpec id1 = allocate(task, PARTY_TIME, Granularities.MINUTE, segmentGranularity, "s1", null);
final SegmentIdWithShardSpec id2 = allocate(task, PARTY_TIME, Granularities.MINUTE, segmentGranularity, "s2", null);
Assert.assertNotNull(id1);
Assert.assertNotNull(id2);
}
Aggregations