use of org.apache.druid.indexing.firehose.IngestSegmentFirehoseFactory in project druid by druid-io.
the class CompactionTaskRunTest method testRunRegularIndexTaskWithIngestSegmentFirehose.
/**
* Run a regular index task that's equivalent to the compaction task in {@link #testRunWithDynamicPartitioning()},
* using {@link IngestSegmentFirehoseFactory}.
*
* This is not entirely CompactionTask related, but it's similar conceptually and it requires
* similar setup to what this test suite already has.
*
* It could be moved to a separate test class if needed.
*/
@Test
public void testRunRegularIndexTaskWithIngestSegmentFirehose() throws Exception {
runIndexTask();
IndexTask indexTask = new IndexTask(null, null, new IndexTask.IndexIngestionSpec(new DataSchema("test", getObjectMapper().convertValue(new StringInputRowParser(DEFAULT_PARSE_SPEC, null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), null, getObjectMapper()), new IndexTask.IndexIOConfig(new IngestSegmentFirehoseFactory(DATA_SOURCE, Intervals.of("2014-01-01/2014-01-02"), null, null, null, null, null, getIndexIO(), coordinatorClient, segmentCacheManagerFactory, RETRY_POLICY_FACTORY), false, false), IndexTaskTest.createTuningConfig(5000000, null, null, Long.MAX_VALUE, null, false, true)), null);
// This is a regular index so we need to explicitly add this context to store the CompactionState
indexTask.addToContext(Tasks.STORE_COMPACTION_STATE_KEY, true);
final Pair<TaskStatus, List<DataSegment>> resultPair = runTask(indexTask);
Assert.assertTrue(resultPair.lhs.isSuccess());
final List<DataSegment> segments = resultPair.rhs;
Assert.assertEquals(3, segments.size());
for (int i = 0; i < 3; i++) {
Assert.assertEquals(Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", i, i + 1), segments.get(i).getInterval());
Assert.assertEquals(getDefaultCompactionState(Granularities.HOUR, Granularities.MINUTE, ImmutableList.of()), segments.get(i).getLastCompactionState());
if (lockGranularity == LockGranularity.SEGMENT) {
Assert.assertEquals(new NumberedOverwriteShardSpec(32768, 0, 2, (short) 1, (short) 1), segments.get(i).getShardSpec());
} else {
Assert.assertEquals(new NumberedShardSpec(0, 1), segments.get(i).getShardSpec());
}
}
}
use of org.apache.druid.indexing.firehose.IngestSegmentFirehoseFactory in project druid by druid-io.
the class AbstractBatchIndexTask method findInputSegments.
/**
* If the given firehoseFactory is {@link IngestSegmentFirehoseFactory}, then it finds the segments to lock
* from the firehoseFactory. This is because those segments will be read by this task no matter what segments would be
* filtered by intervalsToRead, so they need to be locked.
* <p>
* However, firehoseFactory is not IngestSegmentFirehoseFactory, it means this task will overwrite some segments
* with data read from some input source outside of Druid. As a result, only the segments falling in intervalsToRead
* should be locked.
* <p>
* The order of segments within the returned list is unspecified, but each segment is guaranteed to appear in the list
* only once.
*/
protected static List<DataSegment> findInputSegments(String dataSource, TaskActionClient actionClient, List<Interval> intervalsToRead, FirehoseFactory firehoseFactory) throws IOException {
if (firehoseFactory instanceof IngestSegmentFirehoseFactory) {
// intervalsToRead is ignored here.
final List<WindowedSegmentId> inputSegments = ((IngestSegmentFirehoseFactory) firehoseFactory).getSegments();
if (inputSegments == null) {
final Interval inputInterval = Preconditions.checkNotNull(((IngestSegmentFirehoseFactory) firehoseFactory).getInterval(), "input interval");
return ImmutableList.copyOf(actionClient.submit(new RetrieveUsedSegmentsAction(dataSource, inputInterval, null, Segments.ONLY_VISIBLE)));
} else {
final List<String> inputSegmentIds = inputSegments.stream().map(WindowedSegmentId::getSegmentId).collect(Collectors.toList());
final Collection<DataSegment> dataSegmentsInIntervals = actionClient.submit(new RetrieveUsedSegmentsAction(dataSource, null, inputSegments.stream().flatMap(windowedSegmentId -> windowedSegmentId.getIntervals().stream()).collect(Collectors.toSet()), Segments.ONLY_VISIBLE));
return dataSegmentsInIntervals.stream().filter(segment -> inputSegmentIds.contains(segment.getId().toString())).collect(Collectors.toList());
}
} else {
return ImmutableList.copyOf(actionClient.submit(new RetrieveUsedSegmentsAction(dataSource, null, intervalsToRead, Segments.ONLY_VISIBLE)));
}
}
Aggregations