use of io.druid.segment.realtime.firehose.ReplayableFirehoseFactory in project druid by druid-io.
the class IndexTask method run.
@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
final boolean determineIntervals = !ingestionSchema.getDataSchema().getGranularitySpec().bucketIntervals().isPresent();
final FirehoseFactory delegateFirehoseFactory = ingestionSchema.getIOConfig().getFirehoseFactory();
if (delegateFirehoseFactory instanceof IngestSegmentFirehoseFactory) {
// pass toolbox to Firehose
((IngestSegmentFirehoseFactory) delegateFirehoseFactory).setTaskToolbox(toolbox);
}
final FirehoseFactory firehoseFactory;
if (ingestionSchema.getIOConfig().isSkipFirehoseCaching() || delegateFirehoseFactory instanceof ReplayableFirehoseFactory) {
firehoseFactory = delegateFirehoseFactory;
} else {
firehoseFactory = new ReplayableFirehoseFactory(delegateFirehoseFactory, ingestionSchema.getTuningConfig().isReportParseExceptions(), null, null, smileMapper);
}
final Map<Interval, List<ShardSpec>> shardSpecs = determineShardSpecs(toolbox, firehoseFactory);
final String version;
final DataSchema dataSchema;
if (determineIntervals) {
Interval interval = JodaUtils.umbrellaInterval(shardSpecs.keySet());
TaskLock lock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
version = lock.getVersion();
dataSchema = ingestionSchema.getDataSchema().withGranularitySpec(ingestionSchema.getDataSchema().getGranularitySpec().withIntervals(JodaUtils.condenseIntervals(shardSpecs.keySet())));
} else {
version = Iterables.getOnlyElement(getTaskLocks(toolbox)).getVersion();
dataSchema = ingestionSchema.getDataSchema();
}
if (generateAndPublishSegments(toolbox, dataSchema, shardSpecs, version, firehoseFactory)) {
return TaskStatus.success(getId());
} else {
return TaskStatus.failure(getId());
}
}
use of io.druid.segment.realtime.firehose.ReplayableFirehoseFactory in project druid by druid-io.
the class ReplayableFirehoseFactoryTest method testReplayableFirehoseWithNoRetries.
@Test(expected = TestReadingException.class)
public void testReplayableFirehoseWithNoRetries() throws Exception {
replayableFirehoseFactory = new ReplayableFirehoseFactory(delegateFactory, false, 10000, 0, mapper);
expect(delegateFactory.connect(parser)).andReturn(delegateFirehose);
expect(delegateFirehose.hasMore()).andReturn(true).times(2);
expect(delegateFirehose.nextRow()).andThrow(new TestReadingException());
delegateFirehose.close();
expectLastCall();
replayAll();
replayableFirehoseFactory.connect(parser);
verifyAll();
}
use of io.druid.segment.realtime.firehose.ReplayableFirehoseFactory in project druid by druid-io.
the class ReplayableFirehoseFactoryTest method testReplayableFirehoseWithMultipleFiles.
@Test
public void testReplayableFirehoseWithMultipleFiles() throws Exception {
replayableFirehoseFactory = new ReplayableFirehoseFactory(delegateFactory, false, 1, 3, mapper);
final boolean[] hasMore = { true };
final int multiplicationFactor = 500;
final InputRow finalRow = new MapBasedInputRow(DateTime.now(), Lists.newArrayList("dim4", "dim5"), ImmutableMap.<String, Object>of("dim4", "val12", "dim5", "val20", "met1", 30));
expect(delegateFactory.connect(parser)).andReturn(delegateFirehose);
expect(delegateFirehose.hasMore()).andAnswer(new IAnswer<Boolean>() {
@Override
public Boolean answer() throws Throwable {
return hasMore[0];
}
}).anyTimes();
expect(delegateFirehose.nextRow()).andReturn(testRows.get(0)).times(multiplicationFactor).andReturn(testRows.get(1)).times(multiplicationFactor).andReturn(testRows.get(2)).times(multiplicationFactor).andAnswer(new IAnswer<InputRow>() {
@Override
public InputRow answer() throws Throwable {
hasMore[0] = false;
return finalRow;
}
});
delegateFirehose.close();
replayAll();
List<InputRow> testRowsMultiplied = Lists.newArrayList();
for (InputRow row : testRows) {
for (int i = 0; i < multiplicationFactor; i++) {
testRowsMultiplied.add(row);
}
}
testRowsMultiplied.add(finalRow);
List<InputRow> rows = Lists.newArrayList();
try (Firehose firehose = replayableFirehoseFactory.connect(parser)) {
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
}
Assert.assertEquals(testRowsMultiplied, rows);
// now replay!
rows.clear();
try (Firehose firehose = replayableFirehoseFactory.connect(parser)) {
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
}
Assert.assertEquals(testRowsMultiplied, rows);
verifyAll();
}
use of io.druid.segment.realtime.firehose.ReplayableFirehoseFactory in project druid by druid-io.
the class ReplayableFirehoseFactoryTest method testReplayableFirehoseWithoutReportParseExceptions.
@Test
public void testReplayableFirehoseWithoutReportParseExceptions() throws Exception {
final boolean[] hasMore = { true };
replayableFirehoseFactory = new ReplayableFirehoseFactory(delegateFactory, false, 10000, 3, mapper);
expect(delegateFactory.connect(parser)).andReturn(delegateFirehose);
expect(delegateFirehose.hasMore()).andAnswer(new IAnswer<Boolean>() {
@Override
public Boolean answer() throws Throwable {
return hasMore[0];
}
}).anyTimes();
expect(delegateFirehose.nextRow()).andReturn(testRows.get(0)).andReturn(testRows.get(1)).andThrow(new ParseException("unparseable!")).andAnswer(new IAnswer<InputRow>() {
@Override
public InputRow answer() throws Throwable {
hasMore[0] = false;
return testRows.get(2);
}
});
delegateFirehose.close();
replayAll();
List<InputRow> rows = Lists.newArrayList();
try (Firehose firehose = replayableFirehoseFactory.connect(parser)) {
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
}
Assert.assertEquals(testRows, rows);
verifyAll();
}
Aggregations