Search in sources :

Example 1 with RetrieveUsedSegmentsAction

use of org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction in project druid by druid-io.

the class AbstractBatchIndexTask method getUsedSegmentsWithinInterval.

public static Set<DataSegment> getUsedSegmentsWithinInterval(TaskToolbox toolbox, String dataSource, List<Interval> intervals) throws IOException {
    Set<DataSegment> segmentsFoundForDrop = new HashSet<>();
    List<Interval> condensedIntervals = JodaUtils.condenseIntervals(intervals);
    if (!intervals.isEmpty()) {
        Collection<DataSegment> usedSegment = toolbox.getTaskActionClient().submit(new RetrieveUsedSegmentsAction(dataSource, null, condensedIntervals, Segments.ONLY_VISIBLE));
        for (DataSegment segment : usedSegment) {
            for (Interval interval : condensedIntervals) {
                if (interval.contains(segment.getInterval())) {
                    segmentsFoundForDrop.add(segment);
                    break;
                }
            }
        }
    }
    return segmentsFoundForDrop;
}
Also used : RetrieveUsedSegmentsAction(org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction) DataSegment(org.apache.druid.timeline.DataSegment) HashSet(java.util.HashSet) Interval(org.joda.time.Interval)

Example 2 with RetrieveUsedSegmentsAction

use of org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction in project druid by druid-io.

the class ActionBasedUsedSegmentCheckerTest method testBasic.

@Test
public void testBasic() throws IOException {
    final TaskActionClient taskActionClient = EasyMock.createMock(TaskActionClient.class);
    EasyMock.expect(taskActionClient.submit(new RetrieveUsedSegmentsAction("bar", Intervals.of("2002/P1D"), null, Segments.ONLY_VISIBLE))).andReturn(ImmutableList.of(DataSegment.builder().dataSource("bar").interval(Intervals.of("2002/P1D")).shardSpec(new LinearShardSpec(0)).version("b").size(0).build(), DataSegment.builder().dataSource("bar").interval(Intervals.of("2002/P1D")).shardSpec(new LinearShardSpec(1)).version("b").size(0).build()));
    EasyMock.expect(taskActionClient.submit(new RetrieveUsedSegmentsAction("foo", null, ImmutableList.of(Intervals.of("2000/P1D"), Intervals.of("2001/P1D")), Segments.ONLY_VISIBLE))).andReturn(ImmutableList.of(DataSegment.builder().dataSource("foo").interval(Intervals.of("2000/P1D")).shardSpec(new LinearShardSpec(0)).version("a").size(0).build(), DataSegment.builder().dataSource("foo").interval(Intervals.of("2000/P1D")).shardSpec(new LinearShardSpec(1)).version("a").size(0).build(), DataSegment.builder().dataSource("foo").interval(Intervals.of("2001/P1D")).shardSpec(new LinearShardSpec(1)).version("b").size(0).build(), DataSegment.builder().dataSource("foo").interval(Intervals.of("2002/P1D")).shardSpec(new LinearShardSpec(1)).version("b").size(0).build()));
    EasyMock.replay(taskActionClient);
    final UsedSegmentChecker checker = new ActionBasedUsedSegmentChecker(taskActionClient);
    final Set<DataSegment> segments = checker.findUsedSegments(ImmutableSet.of(new SegmentIdWithShardSpec("foo", Intervals.of("2000/P1D"), "a", new LinearShardSpec(1)), new SegmentIdWithShardSpec("foo", Intervals.of("2001/P1D"), "b", new LinearShardSpec(0)), new SegmentIdWithShardSpec("bar", Intervals.of("2002/P1D"), "b", new LinearShardSpec(0))));
    Assert.assertEquals(ImmutableSet.of(DataSegment.builder().dataSource("foo").interval(Intervals.of("2000/P1D")).shardSpec(new LinearShardSpec(1)).version("a").size(0).build(), DataSegment.builder().dataSource("bar").interval(Intervals.of("2002/P1D")).shardSpec(new LinearShardSpec(0)).version("b").size(0).build()), segments);
    EasyMock.verify(taskActionClient);
}
Also used : TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) RetrieveUsedSegmentsAction(org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction) UsedSegmentChecker(org.apache.druid.segment.realtime.appenderator.UsedSegmentChecker) DataSegment(org.apache.druid.timeline.DataSegment) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) Test(org.junit.Test)

Example 3 with RetrieveUsedSegmentsAction

use of org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction in project druid by druid-io.

the class AbstractBatchIndexTask method findInputSegments.

/**
 * If the given firehoseFactory is {@link IngestSegmentFirehoseFactory}, then it finds the segments to lock
 * from the firehoseFactory. This is because those segments will be read by this task no matter what segments would be
 * filtered by intervalsToRead, so they need to be locked.
 * <p>
 * However, firehoseFactory is not IngestSegmentFirehoseFactory, it means this task will overwrite some segments
 * with data read from some input source outside of Druid. As a result, only the segments falling in intervalsToRead
 * should be locked.
 * <p>
 * The order of segments within the returned list is unspecified, but each segment is guaranteed to appear in the list
 * only once.
 */
protected static List<DataSegment> findInputSegments(String dataSource, TaskActionClient actionClient, List<Interval> intervalsToRead, FirehoseFactory firehoseFactory) throws IOException {
    if (firehoseFactory instanceof IngestSegmentFirehoseFactory) {
        // intervalsToRead is ignored here.
        final List<WindowedSegmentId> inputSegments = ((IngestSegmentFirehoseFactory) firehoseFactory).getSegments();
        if (inputSegments == null) {
            final Interval inputInterval = Preconditions.checkNotNull(((IngestSegmentFirehoseFactory) firehoseFactory).getInterval(), "input interval");
            return ImmutableList.copyOf(actionClient.submit(new RetrieveUsedSegmentsAction(dataSource, inputInterval, null, Segments.ONLY_VISIBLE)));
        } else {
            final List<String> inputSegmentIds = inputSegments.stream().map(WindowedSegmentId::getSegmentId).collect(Collectors.toList());
            final Collection<DataSegment> dataSegmentsInIntervals = actionClient.submit(new RetrieveUsedSegmentsAction(dataSource, null, inputSegments.stream().flatMap(windowedSegmentId -> windowedSegmentId.getIntervals().stream()).collect(Collectors.toSet()), Segments.ONLY_VISIBLE));
            return dataSegmentsInIntervals.stream().filter(segment -> inputSegmentIds.contains(segment.getId().toString())).collect(Collectors.toList());
        }
    } else {
        return ImmutableList.copyOf(actionClient.submit(new RetrieveUsedSegmentsAction(dataSource, null, intervalsToRead, Segments.ONLY_VISIBLE)));
    }
}
Also used : IngestSegmentFirehoseFactory(org.apache.druid.indexing.firehose.IngestSegmentFirehoseFactory) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) IndexIOConfig(org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig) Partitions(org.apache.druid.timeline.Partitions) CompactionState(org.apache.druid.timeline.CompactionState) JodaUtils(org.apache.druid.java.util.common.JodaUtils) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) Optional(com.google.common.base.Optional) Map(java.util.Map) InputSourceReader(org.apache.druid.data.input.InputSourceReader) TaskLock(org.apache.druid.indexing.common.TaskLock) TypeReference(com.fasterxml.jackson.core.type.TypeReference) Execs(org.apache.druid.java.util.common.concurrent.Execs) InputRowSchemas(org.apache.druid.indexing.input.InputRowSchemas) Predicate(java.util.function.Predicate) GuardedBy(com.google.errorprone.annotations.concurrent.GuardedBy) InputFormat(org.apache.druid.data.input.InputFormat) Collection(java.util.Collection) Segments(org.apache.druid.indexing.overlord.Segments) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) IngestionSpec(org.apache.druid.segment.indexing.IngestionSpec) Collectors(java.util.stream.Collectors) LockGranularity(org.apache.druid.indexing.common.LockGranularity) MaxAllowedLocksExceededException(org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException) IngestSegmentFirehoseFactory(org.apache.druid.indexing.firehose.IngestSegmentFirehoseFactory) InputRow(org.apache.druid.data.input.InputRow) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) TaskLockType(org.apache.druid.indexing.common.TaskLockType) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) DataSegment(org.apache.druid.timeline.DataSegment) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Logger(org.apache.druid.java.util.common.logger.Logger) Granularity(org.apache.druid.java.util.common.granularity.Granularity) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) IntervalsByGranularity(org.apache.druid.java.util.common.granularity.IntervalsByGranularity) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) TimeChunkLockTryAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockTryAcquireAction) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) Function(java.util.function.Function) TaskStatus(org.apache.druid.indexer.TaskStatus) TuningConfig(org.apache.druid.segment.indexing.TuningConfig) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) InputSource(org.apache.druid.data.input.InputSource) Interval(org.joda.time.Interval) ImmutableList(com.google.common.collect.ImmutableList) WindowedSegmentId(org.apache.druid.indexing.firehose.WindowedSegmentId) BiConsumer(java.util.function.BiConsumer) SegmentHandoffNotifier(org.apache.druid.segment.handoff.SegmentHandoffNotifier) Nullable(javax.annotation.Nullable) ExecutorService(java.util.concurrent.ExecutorService) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) Period(org.joda.time.Period) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) Iterator(java.util.Iterator) ServiceMetricEvent(org.apache.druid.java.util.emitter.service.ServiceMetricEvent) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) IOException(java.io.IOException) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) GranularityType(org.apache.druid.java.util.common.granularity.GranularityType) Preconditions(com.google.common.base.Preconditions) RetrieveUsedSegmentsAction(org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) DataSchema(org.apache.druid.segment.indexing.DataSchema) Collections(java.util.Collections) RetrieveUsedSegmentsAction(org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction) WindowedSegmentId(org.apache.druid.indexing.firehose.WindowedSegmentId) DataSegment(org.apache.druid.timeline.DataSegment) Interval(org.joda.time.Interval)

Example 4 with RetrieveUsedSegmentsAction

use of org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction in project druid by druid-io.

the class ActionBasedUsedSegmentChecker method findUsedSegments.

@Override
public Set<DataSegment> findUsedSegments(Set<SegmentIdWithShardSpec> segmentIds) throws IOException {
    // Group by dataSource
    final Map<String, Set<SegmentId>> idsByDataSource = new TreeMap<>();
    for (SegmentIdWithShardSpec segmentId : segmentIds) {
        idsByDataSource.computeIfAbsent(segmentId.getDataSource(), i -> new HashSet<>()).add(segmentId.asSegmentId());
    }
    final Set<DataSegment> usedSegments = new HashSet<>();
    for (Map.Entry<String, Set<SegmentId>> entry : idsByDataSource.entrySet()) {
        String dataSource = entry.getKey();
        Set<SegmentId> segmentIdsInDataSource = entry.getValue();
        final List<Interval> intervals = JodaUtils.condenseIntervals(Iterables.transform(segmentIdsInDataSource, SegmentId::getInterval));
        final Collection<DataSegment> usedSegmentsForIntervals = taskActionClient.submit(new RetrieveUsedSegmentsAction(dataSource, null, intervals, Segments.ONLY_VISIBLE));
        for (DataSegment segment : usedSegmentsForIntervals) {
            if (segmentIdsInDataSource.contains(segment.getId())) {
                usedSegments.add(segment);
            }
        }
    }
    return usedSegments;
}
Also used : Iterables(com.google.common.collect.Iterables) Collection(java.util.Collection) Segments(org.apache.druid.indexing.overlord.Segments) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) Set(java.util.Set) IOException(java.io.IOException) UsedSegmentChecker(org.apache.druid.segment.realtime.appenderator.UsedSegmentChecker) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) List(java.util.List) JodaUtils(org.apache.druid.java.util.common.JodaUtils) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) TreeMap(java.util.TreeMap) Map(java.util.Map) DataSegment(org.apache.druid.timeline.DataSegment) RetrieveUsedSegmentsAction(org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction) SegmentId(org.apache.druid.timeline.SegmentId) Set(java.util.Set) HashSet(java.util.HashSet) SegmentId(org.apache.druid.timeline.SegmentId) TreeMap(java.util.TreeMap) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) RetrieveUsedSegmentsAction(org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction) TreeMap(java.util.TreeMap) Map(java.util.Map) HashSet(java.util.HashSet) Interval(org.joda.time.Interval)

Aggregations

RetrieveUsedSegmentsAction (org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction)4 DataSegment (org.apache.druid.timeline.DataSegment)4 HashSet (java.util.HashSet)3 TaskActionClient (org.apache.druid.indexing.common.actions.TaskActionClient)3 Interval (org.joda.time.Interval)3 IOException (java.io.IOException)2 Collection (java.util.Collection)2 List (java.util.List)2 Map (java.util.Map)2 Set (java.util.Set)2 Segments (org.apache.druid.indexing.overlord.Segments)2 JodaUtils (org.apache.druid.java.util.common.JodaUtils)2 TypeReference (com.fasterxml.jackson.core.type.TypeReference)1 Optional (com.google.common.base.Optional)1 Preconditions (com.google.common.base.Preconditions)1 ImmutableList (com.google.common.collect.ImmutableList)1 Iterables (com.google.common.collect.Iterables)1 GuardedBy (com.google.errorprone.annotations.concurrent.GuardedBy)1 File (java.io.File)1 ArrayList (java.util.ArrayList)1