use of io.druid.segment.realtime.FireDepartment in project druid by druid-io.
the class RealtimeIndexTask method run.
@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
runThread = Thread.currentThread();
if (this.plumber != null) {
throw new IllegalStateException("WTF?!? run with non-null plumber??!");
}
boolean normalExit = true;
// It would be nice to get the PlumberSchool in the constructor. Although that will need jackson injectables for
// stuff like the ServerView, which seems kind of odd? Perhaps revisit this when Guice has been introduced.
final SegmentPublisher segmentPublisher = new TaskActionSegmentPublisher(this, toolbox);
// NOTE: We talk to the coordinator in various places in the plumber and we could be more robust to issues
// with the coordinator. Right now, we'll block/throw in whatever thread triggered the coordinator behavior,
// which will typically be either the main data processing loop or the persist thread.
// Wrap default DataSegmentAnnouncer such that we unlock intervals as we unannounce segments
final DataSegmentAnnouncer lockingSegmentAnnouncer = new DataSegmentAnnouncer() {
@Override
public void announceSegment(final DataSegment segment) throws IOException {
// Side effect: Calling announceSegment causes a lock to be acquired
toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
toolbox.getSegmentAnnouncer().announceSegment(segment);
}
@Override
public void unannounceSegment(final DataSegment segment) throws IOException {
try {
toolbox.getSegmentAnnouncer().unannounceSegment(segment);
} finally {
toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
}
}
@Override
public void announceSegments(Iterable<DataSegment> segments) throws IOException {
// Side effect: Calling announceSegments causes locks to be acquired
for (DataSegment segment : segments) {
toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
}
toolbox.getSegmentAnnouncer().announceSegments(segments);
}
@Override
public void unannounceSegments(Iterable<DataSegment> segments) throws IOException {
try {
toolbox.getSegmentAnnouncer().unannounceSegments(segments);
} finally {
for (DataSegment segment : segments) {
toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
}
}
}
@Override
public boolean isAnnounced(DataSegment segment) {
return toolbox.getSegmentAnnouncer().isAnnounced(segment);
}
};
// NOTE: getVersion will block if there is lock contention, which will block plumber.getSink
// NOTE: (and thus the firehose)
// Shouldn't usually happen, since we don't expect people to submit tasks that intersect with the
// realtime window, but if they do it can be problematic. If we decide to care, we can use more threads in
// the plumber such that waiting for the coordinator doesn't block data processing.
final VersioningPolicy versioningPolicy = new VersioningPolicy() {
@Override
public String getVersion(final Interval interval) {
try {
// Side effect: Calling getVersion causes a lock to be acquired
final TaskLock myLock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
return myLock.getVersion();
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
};
DataSchema dataSchema = spec.getDataSchema();
RealtimeIOConfig realtimeIOConfig = spec.getIOConfig();
RealtimeTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(new File(toolbox.getTaskWorkDir(), "persist")).withVersioningPolicy(versioningPolicy);
final FireDepartment fireDepartment = new FireDepartment(dataSchema, realtimeIOConfig, tuningConfig);
this.metrics = fireDepartment.getMetrics();
final RealtimeMetricsMonitor metricsMonitor = new RealtimeMetricsMonitor(ImmutableList.of(fireDepartment), ImmutableMap.of(DruidMetrics.TASK_ID, new String[] { getId() }));
this.queryRunnerFactoryConglomerate = toolbox.getQueryRunnerFactoryConglomerate();
// NOTE: This pusher selects path based purely on global configuration and the DataSegment, which means
// NOTE: that redundant realtime tasks will upload to the same location. This can cause index.zip
// NOTE: (partitionNum_index.zip for HDFS data storage) and descriptor.json (partitionNum_descriptor.json for
// NOTE: HDFS data storage) to mismatch, or it can cause historical nodes to load different instances of
// NOTE: the "same" segment.
final PlumberSchool plumberSchool = new RealtimePlumberSchool(toolbox.getEmitter(), toolbox.getQueryRunnerFactoryConglomerate(), toolbox.getSegmentPusher(), lockingSegmentAnnouncer, segmentPublisher, toolbox.getSegmentHandoffNotifierFactory(), toolbox.getQueryExecutorService(), toolbox.getIndexMerger(), toolbox.getIndexMergerV9(), toolbox.getIndexIO(), toolbox.getCache(), toolbox.getCacheConfig(), toolbox.getObjectMapper());
this.plumber = plumberSchool.findPlumber(dataSchema, tuningConfig, metrics);
Supplier<Committer> committerSupplier = null;
try {
plumber.startJob();
// Set up metrics emission
toolbox.getMonitorScheduler().addMonitor(metricsMonitor);
// Delay firehose connection to avoid claiming input resources while the plumber is starting up.
final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
// Skip connecting firehose if we've been stopped before we got started.
synchronized (this) {
if (!gracefullyStopped) {
firehose = firehoseFactory.connect(spec.getDataSchema().getParser());
committerSupplier = Committers.supplierFromFirehose(firehose);
}
}
// Time to read data!
while (firehose != null && (!gracefullyStopped || firehoseDrainableByClosing) && firehose.hasMore()) {
Plumbers.addNextRow(committerSupplier, firehose, plumber, tuningConfig.isReportParseExceptions(), metrics);
}
} catch (Throwable e) {
normalExit = false;
log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
throw e;
} finally {
if (normalExit) {
try {
// Persist if we had actually started.
if (firehose != null) {
log.info("Persisting remaining data.");
final Committer committer = committerSupplier.get();
final CountDownLatch persistLatch = new CountDownLatch(1);
plumber.persist(new Committer() {
@Override
public Object getMetadata() {
return committer.getMetadata();
}
@Override
public void run() {
try {
committer.run();
} finally {
persistLatch.countDown();
}
}
});
persistLatch.await();
}
if (gracefullyStopped) {
log.info("Gracefully stopping.");
} else {
log.info("Finishing the job.");
synchronized (this) {
if (gracefullyStopped) {
// Someone called stopGracefully after we checked the flag. That's okay, just stop now.
log.info("Gracefully stopping.");
} else {
finishingJob = true;
}
}
if (finishingJob) {
plumber.finishJob();
}
}
} catch (InterruptedException e) {
log.debug(e, "Interrupted while finishing the job");
} catch (Exception e) {
log.makeAlert(e, "Failed to finish realtime task").emit();
throw e;
} finally {
if (firehose != null) {
CloseQuietly.close(firehose);
}
toolbox.getMonitorScheduler().removeMonitor(metricsMonitor);
}
}
}
log.info("Job done!");
return TaskStatus.success(getId());
}
use of io.druid.segment.realtime.FireDepartment in project druid by druid-io.
the class RealtimeIndexTaskTest method makeRealtimeTask.
private RealtimeIndexTask makeRealtimeTask(final String taskId, boolean reportParseExceptions, long handoffTimeout) {
ObjectMapper objectMapper = new DefaultObjectMapper();
DataSchema dataSchema = new DataSchema("test_ds", null, new AggregatorFactory[] { new CountAggregatorFactory("rows"), new LongSumAggregatorFactory("met1", "met1") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), objectMapper);
RealtimeIOConfig realtimeIOConfig = new RealtimeIOConfig(new TestFirehoseFactory(), null, null);
RealtimeTuningConfig realtimeTuningConfig = new RealtimeTuningConfig(1000, new Period("P1Y"), new Period("PT10M"), null, null, new ServerTimeRejectionPolicyFactory(), null, null, null, buildV9Directly, 0, 0, reportParseExceptions, handoffTimeout);
return new RealtimeIndexTask(taskId, null, new FireDepartment(dataSchema, realtimeIOConfig, realtimeTuningConfig), null) {
@Override
protected boolean isFirehoseDrainableByClosing(FirehoseFactory firehoseFactory) {
return true;
}
};
}
use of io.druid.segment.realtime.FireDepartment in project druid by druid-io.
the class KafkaSupervisorTest method testKillIncompatibleTasks.
@Test
public void testKillIncompatibleTasks() throws Exception {
supervisor = getSupervisor(2, 1, true, "PT1H", null);
addSomeEvents(1);
Task id1 = createKafkaIndexTask("id1", DATASOURCE, "index_kafka_testDS__some_other_sequenceName", new KafkaPartitions("topic", ImmutableMap.of(0, 0L)), new KafkaPartitions("topic", ImmutableMap.of(0, 10L)), null);
Task id2 = createKafkaIndexTask("id2", DATASOURCE, "sequenceName-0", new KafkaPartitions("topic", ImmutableMap.of(0, 0L, 1, 0L, 2, 0L)), new KafkaPartitions("topic", ImmutableMap.of(0, 333L, 1, 333L, 2, 333L)), null);
Task id3 = createKafkaIndexTask("id3", DATASOURCE, "index_kafka_testDS__some_other_sequenceName", new KafkaPartitions("topic", ImmutableMap.of(0, 0L, 1, 0L, 2, 1L)), new KafkaPartitions("topic", ImmutableMap.of(0, 333L, 1, 333L, 2, 330L)), null);
Task id4 = createKafkaIndexTask("id4", "other-datasource", "index_kafka_testDS_d927edff33c4b3f", new KafkaPartitions("topic", ImmutableMap.of(0, 0L)), new KafkaPartitions("topic", ImmutableMap.of(0, 10L)), null);
Task id5 = new // non KafkaIndexTask (don't kill)
RealtimeIndexTask("id5", null, new FireDepartment(dataSchema, new RealtimeIOConfig(null, null, null), null), null);
List<Task> existingTasks = ImmutableList.of(id1, id2, id3, id4, id5);
expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
expect(taskRunner.getRunningTasks()).andReturn(Collections.EMPTY_LIST).anyTimes();
expect(taskStorage.getActiveTasks()).andReturn(existingTasks).anyTimes();
expect(taskStorage.getStatus("id1")).andReturn(Optional.of(TaskStatus.running("id1"))).anyTimes();
expect(taskStorage.getStatus("id2")).andReturn(Optional.of(TaskStatus.running("id2"))).anyTimes();
expect(taskStorage.getStatus("id3")).andReturn(Optional.of(TaskStatus.running("id3"))).anyTimes();
expect(taskStorage.getTask("id1")).andReturn(Optional.of(id1)).anyTimes();
expect(taskStorage.getTask("id2")).andReturn(Optional.of(id2)).anyTimes();
expect(taskStorage.getTask("id3")).andReturn(Optional.of(id3)).anyTimes();
expect(taskClient.getStatusAsync(anyString())).andReturn(Futures.immediateFuture(KafkaIndexTask.Status.NOT_STARTED)).anyTimes();
expect(taskClient.getStartTimeAsync(anyString())).andReturn(Futures.immediateFuture(DateTime.now())).anyTimes();
expect(indexerMetadataStorageCoordinator.getDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
expect(taskClient.stopAsync("id1", false)).andReturn(Futures.immediateFuture(true));
expect(taskClient.stopAsync("id3", false)).andReturn(Futures.immediateFuture(false));
taskRunner.registerListener(anyObject(TaskRunnerListener.class), anyObject(Executor.class));
taskQueue.shutdown("id3");
expect(taskQueue.add(anyObject(Task.class))).andReturn(true);
replayAll();
supervisor.start();
supervisor.runInternal();
verifyAll();
}
use of io.druid.segment.realtime.FireDepartment in project druid by druid-io.
the class TaskSerdeTest method testRealtimeIndexTaskSerde.
@Test
public void testRealtimeIndexTaskSerde() throws Exception {
final RealtimeIndexTask task = new RealtimeIndexTask(null, new TaskResource("rofl", 2), new FireDepartment(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper), new RealtimeIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null), new PlumberSchool() {
@Override
public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
return null;
}
}, null), new RealtimeTuningConfig(1, new Period("PT10M"), null, null, null, null, 1, NoneShardSpec.instance(), indexSpec, null, 0, 0, true, null)), null);
final String json = jsonMapper.writeValueAsString(task);
// Just want to run the clock a bit to make sure the task id doesn't change
Thread.sleep(100);
final RealtimeIndexTask task2 = (RealtimeIndexTask) jsonMapper.readValue(json, Task.class);
Assert.assertEquals("foo", task.getDataSource());
Assert.assertEquals(2, task.getTaskResource().getRequiredCapacity());
Assert.assertEquals("rofl", task.getTaskResource().getAvailabilityGroup());
Assert.assertEquals(new Period("PT10M"), task.getRealtimeIngestionSchema().getTuningConfig().getWindowPeriod());
Assert.assertEquals(Granularities.HOUR, task.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity());
Assert.assertTrue(task.getRealtimeIngestionSchema().getTuningConfig().isReportParseExceptions());
Assert.assertEquals(task.getId(), task2.getId());
Assert.assertEquals(task.getGroupId(), task2.getGroupId());
Assert.assertEquals(task.getDataSource(), task2.getDataSource());
Assert.assertEquals(task.getTaskResource().getRequiredCapacity(), task2.getTaskResource().getRequiredCapacity());
Assert.assertEquals(task.getTaskResource().getAvailabilityGroup(), task2.getTaskResource().getAvailabilityGroup());
Assert.assertEquals(task.getRealtimeIngestionSchema().getTuningConfig().getWindowPeriod(), task2.getRealtimeIngestionSchema().getTuningConfig().getWindowPeriod());
Assert.assertEquals(task.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity(), task2.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity());
}
Aggregations