use of org.apache.druid.indexing.firehose.WindowedSegmentId in project druid by druid-io.
the class DruidInputSource method getTimelineForSegmentIds.
public static List<TimelineObjectHolder<String, DataSegment>> getTimelineForSegmentIds(CoordinatorClient coordinatorClient, String dataSource, List<WindowedSegmentId> segmentIds) {
final SortedMap<Interval, TimelineObjectHolder<String, DataSegment>> timeline = new TreeMap<>(Comparators.intervalsByStartThenEnd());
for (WindowedSegmentId windowedSegmentId : Preconditions.checkNotNull(segmentIds, "segmentIds")) {
final DataSegment segment = coordinatorClient.fetchUsedSegment(dataSource, windowedSegmentId.getSegmentId());
for (Interval interval : windowedSegmentId.getIntervals()) {
final TimelineObjectHolder<String, DataSegment> existingHolder = timeline.get(interval);
if (existingHolder != null) {
if (!existingHolder.getVersion().equals(segment.getVersion())) {
throw new ISE("Timeline segments with the same interval should have the same version: " + "existing version[%s] vs new segment[%s]", existingHolder.getVersion(), segment);
}
existingHolder.getObject().add(segment.getShardSpec().createChunk(segment));
} else {
timeline.put(interval, new TimelineObjectHolder<>(interval, segment.getInterval(), segment.getVersion(), new PartitionHolder<>(segment.getShardSpec().createChunk(segment))));
}
}
}
// Validate that none of the given windows overlaps (except for when multiple segments share exactly the
// same interval).
Interval lastInterval = null;
for (Interval interval : timeline.keySet()) {
if (lastInterval != null && interval.overlaps(lastInterval)) {
throw new IAE("Distinct intervals in input segments may not overlap: [%s] vs [%s]", lastInterval, interval);
}
lastInterval = interval;
}
return new ArrayList<>(timeline.values());
}
use of org.apache.druid.indexing.firehose.WindowedSegmentId in project druid by druid-io.
the class DruidInputSource method createSplits.
public static Iterator<InputSplit<List<WindowedSegmentId>>> createSplits(CoordinatorClient coordinatorClient, RetryPolicyFactory retryPolicyFactory, String dataSource, Interval interval, SplitHintSpec splitHintSpec) {
final SplitHintSpec convertedSplitHintSpec;
if (splitHintSpec instanceof SegmentsSplitHintSpec) {
final SegmentsSplitHintSpec segmentsSplitHintSpec = (SegmentsSplitHintSpec) splitHintSpec;
convertedSplitHintSpec = new MaxSizeSplitHintSpec(segmentsSplitHintSpec.getMaxInputSegmentBytesPerTask(), segmentsSplitHintSpec.getMaxNumSegments());
} else {
convertedSplitHintSpec = splitHintSpec;
}
final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = getTimelineForInterval(coordinatorClient, retryPolicyFactory, dataSource, interval);
final Map<WindowedSegmentId, Long> segmentIdToSize = createWindowedSegmentIdFromTimeline(timelineSegments);
// noinspection ConstantConditions
return Iterators.transform(convertedSplitHintSpec.split(// the same input split.
segmentIdToSize.keySet().iterator(), segmentId -> new InputFileAttribute(Preconditions.checkNotNull(segmentIdToSize.get(segmentId), "segment size for [%s]", segmentId))), InputSplit::new);
}
use of org.apache.druid.indexing.firehose.WindowedSegmentId in project druid by druid-io.
the class AbstractBatchIndexTask method findInputSegments.
/**
* If the given firehoseFactory is {@link IngestSegmentFirehoseFactory}, then it finds the segments to lock
* from the firehoseFactory. This is because those segments will be read by this task no matter what segments would be
* filtered by intervalsToRead, so they need to be locked.
* <p>
* However, firehoseFactory is not IngestSegmentFirehoseFactory, it means this task will overwrite some segments
* with data read from some input source outside of Druid. As a result, only the segments falling in intervalsToRead
* should be locked.
* <p>
* The order of segments within the returned list is unspecified, but each segment is guaranteed to appear in the list
* only once.
*/
protected static List<DataSegment> findInputSegments(String dataSource, TaskActionClient actionClient, List<Interval> intervalsToRead, FirehoseFactory firehoseFactory) throws IOException {
if (firehoseFactory instanceof IngestSegmentFirehoseFactory) {
// intervalsToRead is ignored here.
final List<WindowedSegmentId> inputSegments = ((IngestSegmentFirehoseFactory) firehoseFactory).getSegments();
if (inputSegments == null) {
final Interval inputInterval = Preconditions.checkNotNull(((IngestSegmentFirehoseFactory) firehoseFactory).getInterval(), "input interval");
return ImmutableList.copyOf(actionClient.submit(new RetrieveUsedSegmentsAction(dataSource, inputInterval, null, Segments.ONLY_VISIBLE)));
} else {
final List<String> inputSegmentIds = inputSegments.stream().map(WindowedSegmentId::getSegmentId).collect(Collectors.toList());
final Collection<DataSegment> dataSegmentsInIntervals = actionClient.submit(new RetrieveUsedSegmentsAction(dataSource, null, inputSegments.stream().flatMap(windowedSegmentId -> windowedSegmentId.getIntervals().stream()).collect(Collectors.toSet()), Segments.ONLY_VISIBLE));
return dataSegmentsInIntervals.stream().filter(segment -> inputSegmentIds.contains(segment.getId().toString())).collect(Collectors.toList());
}
} else {
return ImmutableList.copyOf(actionClient.submit(new RetrieveUsedSegmentsAction(dataSource, null, intervalsToRead, Segments.ONLY_VISIBLE)));
}
}
use of org.apache.druid.indexing.firehose.WindowedSegmentId in project druid by druid-io.
the class DruidInputSourceTest method testSerdeUsingSegments.
@Test
public void testSerdeUsingSegments() throws Exception {
final String json = "{" + "\"type\":\"druid\"," + "\"dataSource\":\"foo\"," + "\"segments\":[" + "{\"segmentId\":\"foo_2000-01-01T00:00:00.000Z_2000-01-01T01:00:00.000Z_abc123\"," + "\"intervals\":[\"2000-01-01T00:00:00.000Z/2000-01-01T12:00:00.000Z\"]}" + "]" + "}";
final InputSource inputSource = mapper.readValue(json, InputSource.class);
Assert.assertThat(inputSource, CoreMatchers.instanceOf(DruidInputSource.class));
Assert.assertEquals(new DruidInputSource("foo", null, ImmutableList.of(new WindowedSegmentId("foo_2000-01-01T00:00:00.000Z_2000-01-01T01:00:00.000Z_abc123", ImmutableList.of(Intervals.of("2000-01-01T00/2000-01-01T12")))), null, null, null, indexIO, coordinatorClient, segmentCacheManagerFactory, retryPolicyFactory, taskConfig), inputSource);
Assert.assertEquals(json, mapper.writeValueAsString(inputSource));
}
Aggregations