Search in sources :

Example 1 with ReplayableFirehoseFactory

use of io.druid.segment.realtime.firehose.ReplayableFirehoseFactory in project druid by druid-io.

the class IndexTask method run.

@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
    final boolean determineIntervals = !ingestionSchema.getDataSchema().getGranularitySpec().bucketIntervals().isPresent();
    final FirehoseFactory delegateFirehoseFactory = ingestionSchema.getIOConfig().getFirehoseFactory();
    if (delegateFirehoseFactory instanceof IngestSegmentFirehoseFactory) {
        // pass toolbox to Firehose
        ((IngestSegmentFirehoseFactory) delegateFirehoseFactory).setTaskToolbox(toolbox);
    }
    final FirehoseFactory firehoseFactory;
    if (ingestionSchema.getIOConfig().isSkipFirehoseCaching() || delegateFirehoseFactory instanceof ReplayableFirehoseFactory) {
        firehoseFactory = delegateFirehoseFactory;
    } else {
        firehoseFactory = new ReplayableFirehoseFactory(delegateFirehoseFactory, ingestionSchema.getTuningConfig().isReportParseExceptions(), null, null, smileMapper);
    }
    final Map<Interval, List<ShardSpec>> shardSpecs = determineShardSpecs(toolbox, firehoseFactory);
    final String version;
    final DataSchema dataSchema;
    if (determineIntervals) {
        Interval interval = JodaUtils.umbrellaInterval(shardSpecs.keySet());
        TaskLock lock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
        version = lock.getVersion();
        dataSchema = ingestionSchema.getDataSchema().withGranularitySpec(ingestionSchema.getDataSchema().getGranularitySpec().withIntervals(JodaUtils.condenseIntervals(shardSpecs.keySet())));
    } else {
        version = Iterables.getOnlyElement(getTaskLocks(toolbox)).getVersion();
        dataSchema = ingestionSchema.getDataSchema();
    }
    if (generateAndPublishSegments(toolbox, dataSchema, shardSpecs, version, firehoseFactory)) {
        return TaskStatus.success(getId());
    } else {
        return TaskStatus.failure(getId());
    }
}
Also used : IngestSegmentFirehoseFactory(io.druid.indexing.firehose.IngestSegmentFirehoseFactory) DataSchema(io.druid.segment.indexing.DataSchema) TaskLock(io.druid.indexing.common.TaskLock) IngestSegmentFirehoseFactory(io.druid.indexing.firehose.IngestSegmentFirehoseFactory) ReplayableFirehoseFactory(io.druid.segment.realtime.firehose.ReplayableFirehoseFactory) FirehoseFactory(io.druid.data.input.FirehoseFactory) LockAcquireAction(io.druid.indexing.common.actions.LockAcquireAction) ReplayableFirehoseFactory(io.druid.segment.realtime.firehose.ReplayableFirehoseFactory) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Interval(org.joda.time.Interval)

Example 2 with ReplayableFirehoseFactory

use of io.druid.segment.realtime.firehose.ReplayableFirehoseFactory in project druid by druid-io.

the class ReplayableFirehoseFactoryTest method testReplayableFirehoseWithNoRetries.

@Test(expected = TestReadingException.class)
public void testReplayableFirehoseWithNoRetries() throws Exception {
    replayableFirehoseFactory = new ReplayableFirehoseFactory(delegateFactory, false, 10000, 0, mapper);
    expect(delegateFactory.connect(parser)).andReturn(delegateFirehose);
    expect(delegateFirehose.hasMore()).andReturn(true).times(2);
    expect(delegateFirehose.nextRow()).andThrow(new TestReadingException());
    delegateFirehose.close();
    expectLastCall();
    replayAll();
    replayableFirehoseFactory.connect(parser);
    verifyAll();
}
Also used : ReplayableFirehoseFactory(io.druid.segment.realtime.firehose.ReplayableFirehoseFactory) Test(org.junit.Test)

Example 3 with ReplayableFirehoseFactory

use of io.druid.segment.realtime.firehose.ReplayableFirehoseFactory in project druid by druid-io.

the class ReplayableFirehoseFactoryTest method testReplayableFirehoseWithMultipleFiles.

@Test
public void testReplayableFirehoseWithMultipleFiles() throws Exception {
    replayableFirehoseFactory = new ReplayableFirehoseFactory(delegateFactory, false, 1, 3, mapper);
    final boolean[] hasMore = { true };
    final int multiplicationFactor = 500;
    final InputRow finalRow = new MapBasedInputRow(DateTime.now(), Lists.newArrayList("dim4", "dim5"), ImmutableMap.<String, Object>of("dim4", "val12", "dim5", "val20", "met1", 30));
    expect(delegateFactory.connect(parser)).andReturn(delegateFirehose);
    expect(delegateFirehose.hasMore()).andAnswer(new IAnswer<Boolean>() {

        @Override
        public Boolean answer() throws Throwable {
            return hasMore[0];
        }
    }).anyTimes();
    expect(delegateFirehose.nextRow()).andReturn(testRows.get(0)).times(multiplicationFactor).andReturn(testRows.get(1)).times(multiplicationFactor).andReturn(testRows.get(2)).times(multiplicationFactor).andAnswer(new IAnswer<InputRow>() {

        @Override
        public InputRow answer() throws Throwable {
            hasMore[0] = false;
            return finalRow;
        }
    });
    delegateFirehose.close();
    replayAll();
    List<InputRow> testRowsMultiplied = Lists.newArrayList();
    for (InputRow row : testRows) {
        for (int i = 0; i < multiplicationFactor; i++) {
            testRowsMultiplied.add(row);
        }
    }
    testRowsMultiplied.add(finalRow);
    List<InputRow> rows = Lists.newArrayList();
    try (Firehose firehose = replayableFirehoseFactory.connect(parser)) {
        while (firehose.hasMore()) {
            rows.add(firehose.nextRow());
        }
    }
    Assert.assertEquals(testRowsMultiplied, rows);
    // now replay!
    rows.clear();
    try (Firehose firehose = replayableFirehoseFactory.connect(parser)) {
        while (firehose.hasMore()) {
            rows.add(firehose.nextRow());
        }
    }
    Assert.assertEquals(testRowsMultiplied, rows);
    verifyAll();
}
Also used : IAnswer(org.easymock.IAnswer) Firehose(io.druid.data.input.Firehose) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) ReplayableFirehoseFactory(io.druid.segment.realtime.firehose.ReplayableFirehoseFactory) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Test(org.junit.Test)

Example 4 with ReplayableFirehoseFactory

use of io.druid.segment.realtime.firehose.ReplayableFirehoseFactory in project druid by druid-io.

the class ReplayableFirehoseFactoryTest method testReplayableFirehoseWithoutReportParseExceptions.

@Test
public void testReplayableFirehoseWithoutReportParseExceptions() throws Exception {
    final boolean[] hasMore = { true };
    replayableFirehoseFactory = new ReplayableFirehoseFactory(delegateFactory, false, 10000, 3, mapper);
    expect(delegateFactory.connect(parser)).andReturn(delegateFirehose);
    expect(delegateFirehose.hasMore()).andAnswer(new IAnswer<Boolean>() {

        @Override
        public Boolean answer() throws Throwable {
            return hasMore[0];
        }
    }).anyTimes();
    expect(delegateFirehose.nextRow()).andReturn(testRows.get(0)).andReturn(testRows.get(1)).andThrow(new ParseException("unparseable!")).andAnswer(new IAnswer<InputRow>() {

        @Override
        public InputRow answer() throws Throwable {
            hasMore[0] = false;
            return testRows.get(2);
        }
    });
    delegateFirehose.close();
    replayAll();
    List<InputRow> rows = Lists.newArrayList();
    try (Firehose firehose = replayableFirehoseFactory.connect(parser)) {
        while (firehose.hasMore()) {
            rows.add(firehose.nextRow());
        }
    }
    Assert.assertEquals(testRows, rows);
    verifyAll();
}
Also used : IAnswer(org.easymock.IAnswer) Firehose(io.druid.data.input.Firehose) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) ReplayableFirehoseFactory(io.druid.segment.realtime.firehose.ReplayableFirehoseFactory) ParseException(io.druid.java.util.common.parsers.ParseException) Test(org.junit.Test)

Aggregations

ReplayableFirehoseFactory (io.druid.segment.realtime.firehose.ReplayableFirehoseFactory)4 Test (org.junit.Test)3 Firehose (io.druid.data.input.Firehose)2 InputRow (io.druid.data.input.InputRow)2 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)2 IAnswer (org.easymock.IAnswer)2 ImmutableList (com.google.common.collect.ImmutableList)1 FirehoseFactory (io.druid.data.input.FirehoseFactory)1 TaskLock (io.druid.indexing.common.TaskLock)1 LockAcquireAction (io.druid.indexing.common.actions.LockAcquireAction)1 IngestSegmentFirehoseFactory (io.druid.indexing.firehose.IngestSegmentFirehoseFactory)1 ParseException (io.druid.java.util.common.parsers.ParseException)1 DataSchema (io.druid.segment.indexing.DataSchema)1 List (java.util.List)1 Interval (org.joda.time.Interval)1