use of org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction in project druid by druid-io.
the class AbstractBatchIndexTask method getUsedSegmentsWithinInterval.
public static Set<DataSegment> getUsedSegmentsWithinInterval(TaskToolbox toolbox, String dataSource, List<Interval> intervals) throws IOException {
Set<DataSegment> segmentsFoundForDrop = new HashSet<>();
List<Interval> condensedIntervals = JodaUtils.condenseIntervals(intervals);
if (!intervals.isEmpty()) {
Collection<DataSegment> usedSegment = toolbox.getTaskActionClient().submit(new RetrieveUsedSegmentsAction(dataSource, null, condensedIntervals, Segments.ONLY_VISIBLE));
for (DataSegment segment : usedSegment) {
for (Interval interval : condensedIntervals) {
if (interval.contains(segment.getInterval())) {
segmentsFoundForDrop.add(segment);
break;
}
}
}
}
return segmentsFoundForDrop;
}
use of org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction in project druid by druid-io.
the class ActionBasedUsedSegmentCheckerTest method testBasic.
@Test
public void testBasic() throws IOException {
final TaskActionClient taskActionClient = EasyMock.createMock(TaskActionClient.class);
EasyMock.expect(taskActionClient.submit(new RetrieveUsedSegmentsAction("bar", Intervals.of("2002/P1D"), null, Segments.ONLY_VISIBLE))).andReturn(ImmutableList.of(DataSegment.builder().dataSource("bar").interval(Intervals.of("2002/P1D")).shardSpec(new LinearShardSpec(0)).version("b").size(0).build(), DataSegment.builder().dataSource("bar").interval(Intervals.of("2002/P1D")).shardSpec(new LinearShardSpec(1)).version("b").size(0).build()));
EasyMock.expect(taskActionClient.submit(new RetrieveUsedSegmentsAction("foo", null, ImmutableList.of(Intervals.of("2000/P1D"), Intervals.of("2001/P1D")), Segments.ONLY_VISIBLE))).andReturn(ImmutableList.of(DataSegment.builder().dataSource("foo").interval(Intervals.of("2000/P1D")).shardSpec(new LinearShardSpec(0)).version("a").size(0).build(), DataSegment.builder().dataSource("foo").interval(Intervals.of("2000/P1D")).shardSpec(new LinearShardSpec(1)).version("a").size(0).build(), DataSegment.builder().dataSource("foo").interval(Intervals.of("2001/P1D")).shardSpec(new LinearShardSpec(1)).version("b").size(0).build(), DataSegment.builder().dataSource("foo").interval(Intervals.of("2002/P1D")).shardSpec(new LinearShardSpec(1)).version("b").size(0).build()));
EasyMock.replay(taskActionClient);
final UsedSegmentChecker checker = new ActionBasedUsedSegmentChecker(taskActionClient);
final Set<DataSegment> segments = checker.findUsedSegments(ImmutableSet.of(new SegmentIdWithShardSpec("foo", Intervals.of("2000/P1D"), "a", new LinearShardSpec(1)), new SegmentIdWithShardSpec("foo", Intervals.of("2001/P1D"), "b", new LinearShardSpec(0)), new SegmentIdWithShardSpec("bar", Intervals.of("2002/P1D"), "b", new LinearShardSpec(0))));
Assert.assertEquals(ImmutableSet.of(DataSegment.builder().dataSource("foo").interval(Intervals.of("2000/P1D")).shardSpec(new LinearShardSpec(1)).version("a").size(0).build(), DataSegment.builder().dataSource("bar").interval(Intervals.of("2002/P1D")).shardSpec(new LinearShardSpec(0)).version("b").size(0).build()), segments);
EasyMock.verify(taskActionClient);
}
use of org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction in project druid by druid-io.
the class AbstractBatchIndexTask method findInputSegments.
/**
* If the given firehoseFactory is {@link IngestSegmentFirehoseFactory}, then it finds the segments to lock
* from the firehoseFactory. This is because those segments will be read by this task no matter what segments would be
* filtered by intervalsToRead, so they need to be locked.
* <p>
* However, firehoseFactory is not IngestSegmentFirehoseFactory, it means this task will overwrite some segments
* with data read from some input source outside of Druid. As a result, only the segments falling in intervalsToRead
* should be locked.
* <p>
* The order of segments within the returned list is unspecified, but each segment is guaranteed to appear in the list
* only once.
*/
protected static List<DataSegment> findInputSegments(String dataSource, TaskActionClient actionClient, List<Interval> intervalsToRead, FirehoseFactory firehoseFactory) throws IOException {
if (firehoseFactory instanceof IngestSegmentFirehoseFactory) {
// intervalsToRead is ignored here.
final List<WindowedSegmentId> inputSegments = ((IngestSegmentFirehoseFactory) firehoseFactory).getSegments();
if (inputSegments == null) {
final Interval inputInterval = Preconditions.checkNotNull(((IngestSegmentFirehoseFactory) firehoseFactory).getInterval(), "input interval");
return ImmutableList.copyOf(actionClient.submit(new RetrieveUsedSegmentsAction(dataSource, inputInterval, null, Segments.ONLY_VISIBLE)));
} else {
final List<String> inputSegmentIds = inputSegments.stream().map(WindowedSegmentId::getSegmentId).collect(Collectors.toList());
final Collection<DataSegment> dataSegmentsInIntervals = actionClient.submit(new RetrieveUsedSegmentsAction(dataSource, null, inputSegments.stream().flatMap(windowedSegmentId -> windowedSegmentId.getIntervals().stream()).collect(Collectors.toSet()), Segments.ONLY_VISIBLE));
return dataSegmentsInIntervals.stream().filter(segment -> inputSegmentIds.contains(segment.getId().toString())).collect(Collectors.toList());
}
} else {
return ImmutableList.copyOf(actionClient.submit(new RetrieveUsedSegmentsAction(dataSource, null, intervalsToRead, Segments.ONLY_VISIBLE)));
}
}
use of org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction in project druid by druid-io.
the class ActionBasedUsedSegmentChecker method findUsedSegments.
@Override
public Set<DataSegment> findUsedSegments(Set<SegmentIdWithShardSpec> segmentIds) throws IOException {
// Group by dataSource
final Map<String, Set<SegmentId>> idsByDataSource = new TreeMap<>();
for (SegmentIdWithShardSpec segmentId : segmentIds) {
idsByDataSource.computeIfAbsent(segmentId.getDataSource(), i -> new HashSet<>()).add(segmentId.asSegmentId());
}
final Set<DataSegment> usedSegments = new HashSet<>();
for (Map.Entry<String, Set<SegmentId>> entry : idsByDataSource.entrySet()) {
String dataSource = entry.getKey();
Set<SegmentId> segmentIdsInDataSource = entry.getValue();
final List<Interval> intervals = JodaUtils.condenseIntervals(Iterables.transform(segmentIdsInDataSource, SegmentId::getInterval));
final Collection<DataSegment> usedSegmentsForIntervals = taskActionClient.submit(new RetrieveUsedSegmentsAction(dataSource, null, intervals, Segments.ONLY_VISIBLE));
for (DataSegment segment : usedSegmentsForIntervals) {
if (segmentIdsInDataSource.contains(segment.getId())) {
usedSegments.add(segment);
}
}
}
return usedSegments;
}
Aggregations