use of org.apache.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.
the class KafkaSupervisorTest method testDoNotKillCompatibleTasks.
@Test
public void testDoNotKillCompatibleTasks() throws Exception {
// This supervisor always returns true for isTaskCurrent -> it should not kill its tasks
int numReplicas = 2;
supervisor = getTestableSupervisorCustomIsTaskCurrent(numReplicas, 1, true, "PT1H", new Period("P1D"), new Period("P1D"), false, true);
addSomeEvents(1);
Task task = createKafkaIndexTask("id1", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>("topic", ImmutableMap.of(0, 0L, 2, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>("topic", ImmutableMap.of(0, Long.MAX_VALUE, 2, Long.MAX_VALUE)), null, null, supervisor.getTuningConfig());
List<Task> existingTasks = ImmutableList.of(task);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(existingTasks).anyTimes();
EasyMock.expect(taskStorage.getStatus("id1")).andReturn(Optional.of(TaskStatus.running("id1"))).anyTimes();
EasyMock.expect(taskStorage.getTask("id1")).andReturn(Optional.of(task)).anyTimes();
EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(Status.NOT_STARTED)).anyTimes();
EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
EasyMock.expect(taskQueue.add(EasyMock.anyObject(Task.class))).andReturn(true);
TreeMap<Integer, Map<Integer, Long>> checkpoints1 = new TreeMap<>();
checkpoints1.put(0, ImmutableMap.of(0, 0L, 2, 0L));
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("id1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints1)).times(numReplicas);
replayAll();
supervisor.start();
supervisor.runInternal();
verifyAll();
}
use of org.apache.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.
the class KafkaSupervisorTest method testSuspendedRunningTasks.
@Test
public void testSuspendedRunningTasks() throws Exception {
// graceful shutdown is expected to be called on running tasks since state is suspended
final TaskLocation location1 = new TaskLocation("testHost", 1234, -1);
final TaskLocation location2 = new TaskLocation("testHost2", 145, -1);
final DateTime startTime = DateTimes.nowUtc();
supervisor = getTestableSupervisor(2, 1, true, "PT1H", null, null, true, kafkaHost);
final KafkaSupervisorTuningConfig tuningConfig = supervisor.getTuningConfig();
addSomeEvents(1);
Task id1 = createKafkaIndexTask("id1", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>("topic", ImmutableMap.of(0, 0L, 1, 0L, 2, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>("topic", ImmutableMap.of(0, Long.MAX_VALUE, 1, Long.MAX_VALUE, 2, Long.MAX_VALUE)), null, null, tuningConfig);
Task id2 = createKafkaIndexTask("id2", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>("topic", ImmutableMap.of(0, 10L, 1, 20L, 2, 30L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>("topic", ImmutableMap.of(0, Long.MAX_VALUE, 1, Long.MAX_VALUE, 2, Long.MAX_VALUE)), null, null, tuningConfig);
Task id3 = createKafkaIndexTask("id3", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>("topic", ImmutableMap.of(0, 10L, 1, 20L, 2, 30L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>("topic", ImmutableMap.of(0, Long.MAX_VALUE, 1, Long.MAX_VALUE, 2, Long.MAX_VALUE)), null, null, tuningConfig);
Collection workItems = new ArrayList<>();
workItems.add(new TestTaskRunnerWorkItem(id1, null, location1));
workItems.add(new TestTaskRunnerWorkItem(id2, null, location2));
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskRunner.getRunningTasks()).andReturn(workItems).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of(id1, id2, id3)).anyTimes();
EasyMock.expect(taskStorage.getStatus("id1")).andReturn(Optional.of(TaskStatus.running("id1"))).anyTimes();
EasyMock.expect(taskStorage.getStatus("id2")).andReturn(Optional.of(TaskStatus.running("id2"))).anyTimes();
EasyMock.expect(taskStorage.getStatus("id3")).andReturn(Optional.of(TaskStatus.running("id3"))).anyTimes();
EasyMock.expect(taskStorage.getTask("id1")).andReturn(Optional.of(id1)).anyTimes();
EasyMock.expect(taskStorage.getTask("id2")).andReturn(Optional.of(id2)).anyTimes();
EasyMock.expect(taskStorage.getTask("id3")).andReturn(Optional.of(id3)).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
EasyMock.expect(taskClient.getStatusAsync("id1")).andReturn(Futures.immediateFuture(Status.PUBLISHING));
EasyMock.expect(taskClient.getStatusAsync("id2")).andReturn(Futures.immediateFuture(Status.READING));
EasyMock.expect(taskClient.getStatusAsync("id3")).andReturn(Futures.immediateFuture(Status.READING));
EasyMock.expect(taskClient.getStartTimeAsync("id2")).andReturn(Futures.immediateFuture(startTime));
EasyMock.expect(taskClient.getStartTimeAsync("id3")).andReturn(Futures.immediateFuture(startTime));
EasyMock.expect(taskClient.getEndOffsets("id1")).andReturn(ImmutableMap.of(0, 10L, 1, 20L, 2, 30L));
// getCheckpoints will not be called for id1 as it is in publishing state
TreeMap<Integer, Map<Integer, Long>> checkpoints = new TreeMap<>();
checkpoints.put(0, ImmutableMap.of(0, 10L, 1, 20L, 2, 30L));
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("id2"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints)).times(1);
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("id3"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints)).times(1);
taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
EasyMock.expect(taskClient.pauseAsync("id2")).andReturn(Futures.immediateFuture(ImmutableMap.of(0, 15L, 1, 25L, 2, 30L)));
EasyMock.expect(taskClient.setEndOffsetsAsync("id2", ImmutableMap.of(0, 15L, 1, 25L, 2, 30L), true)).andReturn(Futures.immediateFuture(true));
taskQueue.shutdown("id3", "Killing task for graceful shutdown");
EasyMock.expectLastCall().times(1);
taskQueue.shutdown("id3", "Killing task [%s] which hasn't been assigned to a worker", "id3");
EasyMock.expectLastCall().times(1);
replayAll();
supervisor.start();
supervisor.runInternal();
verifyAll();
}
use of org.apache.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.
the class KafkaSupervisorTest method testDatasourceMetadata.
/**
* Test generating the starting offsets from the partition data stored in druid_dataSource which contains the
* offsets of the last built segments.
*/
@Test
public void testDatasourceMetadata() throws Exception {
supervisor = getTestableSupervisor(1, 1, true, "PT1H", null, null);
addSomeEvents(100);
Capture<KafkaIndexTask> captured = Capture.newInstance();
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 10L, 1, 20L, 2, 30L), ImmutableSet.of()))).anyTimes();
EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true);
replayAll();
supervisor.start();
supervisor.runInternal();
verifyAll();
KafkaIndexTask task = captured.getValue();
KafkaIndexTaskIOConfig taskConfig = task.getIOConfig();
Assert.assertEquals("sequenceName-0", taskConfig.getBaseSequenceName());
Assert.assertEquals(10L, taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(0).longValue());
Assert.assertEquals(20L, taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(1).longValue());
Assert.assertEquals(30L, taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(2).longValue());
}
use of org.apache.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.
the class KafkaSupervisorTest method testDiscoverExistingPublishingTask.
@Test
public void testDiscoverExistingPublishingTask() throws Exception {
final TaskLocation location = new TaskLocation("testHost", 1234, -1);
supervisor = getTestableSupervisor(1, 1, true, "PT1H", null, null);
final KafkaSupervisorTuningConfig tuningConfig = supervisor.getTuningConfig();
addSomeEvents(1);
Task task = createKafkaIndexTask("id1", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>("topic", ImmutableMap.of(0, 0L, 1, 0L, 2, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>("topic", ImmutableMap.of(0, Long.MAX_VALUE, 1, Long.MAX_VALUE, 2, Long.MAX_VALUE)), null, null, supervisor.getTuningConfig());
Collection workItems = new ArrayList<>();
workItems.add(new TestTaskRunnerWorkItem(task, null, location));
Capture<KafkaIndexTask> captured = Capture.newInstance();
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskRunner.getRunningTasks()).andReturn(workItems).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of(task)).anyTimes();
EasyMock.expect(taskStorage.getStatus("id1")).andReturn(Optional.of(TaskStatus.running("id1"))).anyTimes();
EasyMock.expect(taskStorage.getTask("id1")).andReturn(Optional.of(task)).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
EasyMock.expect(taskClient.getStatusAsync("id1")).andReturn(Futures.immediateFuture(Status.PUBLISHING));
EasyMock.expect(taskClient.getCurrentOffsetsAsync("id1", false)).andReturn(Futures.immediateFuture(ImmutableMap.of(0, 10L, 1, 20L, 2, 30L)));
EasyMock.expect(taskClient.getEndOffsets("id1")).andReturn(ImmutableMap.of(0, 10L, 1, 20L, 2, 30L));
EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true);
TreeMap<Integer, Map<Integer, Long>> checkpoints = new TreeMap<>();
checkpoints.put(0, ImmutableMap.of(0, 0L, 1, 0L, 2, 0L));
EasyMock.expect(taskClient.getCheckpoints(EasyMock.anyString(), EasyMock.anyBoolean())).andReturn(checkpoints).anyTimes();
taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
replayAll();
supervisor.start();
supervisor.runInternal();
supervisor.updateCurrentAndLatestOffsets();
SupervisorReport<KafkaSupervisorReportPayload> report = supervisor.getStatus();
verifyAll();
Assert.assertEquals(DATASOURCE, report.getId());
KafkaSupervisorReportPayload payload = report.getPayload();
Assert.assertEquals(DATASOURCE, payload.getDataSource());
Assert.assertEquals(3600L, payload.getDurationSeconds());
Assert.assertEquals(NUM_PARTITIONS, payload.getPartitions());
Assert.assertEquals(1, payload.getReplicas());
Assert.assertEquals(topic, payload.getStream());
Assert.assertEquals(0, payload.getActiveTasks().size());
Assert.assertEquals(1, payload.getPublishingTasks().size());
Assert.assertEquals(SupervisorStateManager.BasicState.RUNNING, payload.getDetailedState());
Assert.assertEquals(0, payload.getRecentErrors().size());
TaskReportData publishingReport = payload.getPublishingTasks().get(0);
Assert.assertEquals("id1", publishingReport.getId());
Assert.assertEquals(ImmutableMap.of(0, 0L, 1, 0L, 2, 0L), publishingReport.getStartingOffsets());
Assert.assertEquals(ImmutableMap.of(0, 10L, 1, 20L, 2, 30L), publishingReport.getCurrentOffsets());
KafkaIndexTask capturedTask = captured.getValue();
Assert.assertEquals(dataSchema, capturedTask.getDataSchema());
Assert.assertEquals(tuningConfig.convertToTaskTuningConfig(), capturedTask.getTuningConfig());
KafkaIndexTaskIOConfig capturedTaskConfig = capturedTask.getIOConfig();
Assert.assertEquals(kafkaHost, capturedTaskConfig.getConsumerProperties().get("bootstrap.servers"));
Assert.assertEquals("myCustomValue", capturedTaskConfig.getConsumerProperties().get("myCustomKey"));
Assert.assertEquals("sequenceName-0", capturedTaskConfig.getBaseSequenceName());
Assert.assertTrue("isUseTransaction", capturedTaskConfig.isUseTransaction());
// check that the new task was created with starting offsets matching where the publishing task finished
Assert.assertEquals(topic, capturedTaskConfig.getStartSequenceNumbers().getStream());
Assert.assertEquals(10L, capturedTaskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(0).longValue());
Assert.assertEquals(20L, capturedTaskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(1).longValue());
Assert.assertEquals(30L, capturedTaskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(2).longValue());
Assert.assertEquals(topic, capturedTaskConfig.getEndSequenceNumbers().getStream());
Assert.assertEquals(Long.MAX_VALUE, capturedTaskConfig.getEndSequenceNumbers().getPartitionSequenceNumberMap().get(0).longValue());
Assert.assertEquals(Long.MAX_VALUE, capturedTaskConfig.getEndSequenceNumbers().getPartitionSequenceNumberMap().get(1).longValue());
Assert.assertEquals(Long.MAX_VALUE, capturedTaskConfig.getEndSequenceNumbers().getPartitionSequenceNumberMap().get(2).longValue());
}
use of org.apache.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.
the class KafkaSupervisorTest method testDontKillTasksWithMismatchedType.
@Test
public void testDontKillTasksWithMismatchedType() throws Exception {
supervisor = getTestableSupervisor(2, 1, true, "PT1H", null, null);
addSomeEvents(1);
// non KafkaIndexTask (don't kill)
Task id2 = new RealtimeIndexTask("id2", null, new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null), null);
List<Task> existingTasks = ImmutableList.of(id2);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(existingTasks).anyTimes();
EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(Status.NOT_STARTED)).anyTimes();
EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
EasyMock.expect(taskQueue.add(EasyMock.anyObject(Task.class))).andReturn(true).anyTimes();
replayAll();
supervisor.start();
supervisor.runInternal();
verifyAll();
}
Aggregations