use of org.apache.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.
the class KafkaSupervisorTest method testFailedInitializationAndRecovery.
@Test
public void testFailedInitializationAndRecovery() throws Exception {
// Block the supervisor initialization with a bad hostname config, make sure this doesn't block the lifecycle
supervisor = getTestableSupervisor(1, 1, true, "PT1H", null, null, false, StringUtils.format("badhostname:%d", kafkaServer.getPort()));
final KafkaSupervisorTuningConfig tuningConfig = supervisor.getTuningConfig();
addSomeEvents(1);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
replayAll();
supervisor.start();
Assert.assertTrue(supervisor.isLifecycleStarted());
Assert.assertFalse(supervisor.isStarted());
verifyAll();
while (supervisor.getInitRetryCounter() < 3) {
Thread.sleep(1000);
}
// Portion below is the same test as testNoInitialState(), testing the supervisor after the initialiation is fixed
resetAll();
Capture<KafkaIndexTask> captured = Capture.newInstance();
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true);
taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
replayAll();
// Fix the bad hostname during the initialization retries and finish the supervisor start.
// This is equivalent to supervisor.start() in testNoInitialState().
// The test supervisor has a P1D period, so we need to manually trigger the initialization retry.
supervisor.getIoConfig().getConsumerProperties().put("bootstrap.servers", kafkaHost);
supervisor.tryInit();
Assert.assertTrue(supervisor.isLifecycleStarted());
Assert.assertTrue(supervisor.isStarted());
supervisor.runInternal();
verifyAll();
KafkaIndexTask task = captured.getValue();
Assert.assertEquals(dataSchema, task.getDataSchema());
Assert.assertEquals(tuningConfig.convertToTaskTuningConfig(), task.getTuningConfig());
KafkaIndexTaskIOConfig taskConfig = task.getIOConfig();
Assert.assertEquals(kafkaHost, taskConfig.getConsumerProperties().get("bootstrap.servers"));
Assert.assertEquals("myCustomValue", taskConfig.getConsumerProperties().get("myCustomKey"));
Assert.assertEquals("sequenceName-0", taskConfig.getBaseSequenceName());
Assert.assertTrue("isUseTransaction", taskConfig.isUseTransaction());
Assert.assertFalse("minimumMessageTime", taskConfig.getMinimumMessageTime().isPresent());
Assert.assertFalse("maximumMessageTime", taskConfig.getMaximumMessageTime().isPresent());
Assert.assertEquals(topic, taskConfig.getStartSequenceNumbers().getStream());
Assert.assertEquals(0L, taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(0).longValue());
Assert.assertEquals(0L, taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(1).longValue());
Assert.assertEquals(0L, taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(2).longValue());
Assert.assertEquals(topic, taskConfig.getEndSequenceNumbers().getStream());
Assert.assertEquals(Long.MAX_VALUE, taskConfig.getEndSequenceNumbers().getPartitionSequenceNumberMap().get(0).longValue());
Assert.assertEquals(Long.MAX_VALUE, taskConfig.getEndSequenceNumbers().getPartitionSequenceNumberMap().get(1).longValue());
Assert.assertEquals(Long.MAX_VALUE, taskConfig.getEndSequenceNumbers().getPartitionSequenceNumberMap().get(2).longValue());
}
use of org.apache.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.
the class KafkaSupervisorTest method testNoInitialState.
@Test
public void testNoInitialState() throws Exception {
supervisor = getTestableSupervisor(1, 1, true, "PT1H", null, null);
final KafkaSupervisorTuningConfig tuningConfig = supervisor.getTuningConfig();
addSomeEvents(1);
Capture<KafkaIndexTask> captured = Capture.newInstance();
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true);
taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
replayAll();
supervisor.start();
supervisor.runInternal();
verifyAll();
KafkaIndexTask task = captured.getValue();
Assert.assertEquals(dataSchema, task.getDataSchema());
Assert.assertEquals(tuningConfig.convertToTaskTuningConfig(), task.getTuningConfig());
KafkaIndexTaskIOConfig taskConfig = task.getIOConfig();
Assert.assertEquals(kafkaHost, taskConfig.getConsumerProperties().get("bootstrap.servers"));
Assert.assertEquals("myCustomValue", taskConfig.getConsumerProperties().get("myCustomKey"));
Assert.assertEquals("sequenceName-0", taskConfig.getBaseSequenceName());
Assert.assertTrue("isUseTransaction", taskConfig.isUseTransaction());
Assert.assertFalse("minimumMessageTime", taskConfig.getMinimumMessageTime().isPresent());
Assert.assertFalse("maximumMessageTime", taskConfig.getMaximumMessageTime().isPresent());
Assert.assertEquals(topic, taskConfig.getStartSequenceNumbers().getStream());
Assert.assertEquals(0L, (long) taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(0));
Assert.assertEquals(0L, (long) taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(1));
Assert.assertEquals(0L, (long) taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(2));
Assert.assertEquals(topic, taskConfig.getEndSequenceNumbers().getStream());
Assert.assertEquals(Long.MAX_VALUE, (long) taskConfig.getEndSequenceNumbers().getPartitionSequenceNumberMap().get(0));
Assert.assertEquals(Long.MAX_VALUE, (long) taskConfig.getEndSequenceNumbers().getPartitionSequenceNumberMap().get(1));
Assert.assertEquals(Long.MAX_VALUE, (long) taskConfig.getEndSequenceNumbers().getPartitionSequenceNumberMap().get(2));
}
use of org.apache.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.
the class KafkaSupervisorTest method testCheckpointForInactiveTaskGroup.
@Test(timeout = 60_000L)
public void testCheckpointForInactiveTaskGroup() throws InterruptedException {
supervisor = getTestableSupervisor(2, 1, true, "PT1S", null, null);
final KafkaSupervisorTuningConfig tuningConfig = supervisor.getTuningConfig();
supervisor.getStateManager().markRunFinished();
// not adding any events
final KafkaIndexTask id1 = createKafkaIndexTask("id1", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 0L, 1, 0L, 2, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, Long.MAX_VALUE, 1, Long.MAX_VALUE, 2, Long.MAX_VALUE)), null, null, tuningConfig);
final Task id2 = createKafkaIndexTask("id2", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 10L, 1, 20L, 2, 30L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, Long.MAX_VALUE, 1, Long.MAX_VALUE, 2, Long.MAX_VALUE)), null, null, tuningConfig);
final Task id3 = createKafkaIndexTask("id3", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 10L, 1, 20L, 2, 30L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, Long.MAX_VALUE, 1, Long.MAX_VALUE, 2, Long.MAX_VALUE)), null, null, tuningConfig);
final TaskLocation location1 = new TaskLocation("testHost", 1234, -1);
final TaskLocation location2 = new TaskLocation("testHost2", 145, -1);
Collection workItems = new ArrayList<>();
workItems.add(new TestTaskRunnerWorkItem(id1, null, location1));
workItems.add(new TestTaskRunnerWorkItem(id2, null, location2));
workItems.add(new TestTaskRunnerWorkItem(id2, null, location2));
EasyMock.expect(taskRunner.getRunningTasks()).andReturn(workItems).anyTimes();
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of(id1, id2, id3)).anyTimes();
EasyMock.expect(taskStorage.getStatus("id1")).andReturn(Optional.of(TaskStatus.running("id1"))).anyTimes();
EasyMock.expect(taskStorage.getStatus("id2")).andReturn(Optional.of(TaskStatus.running("id2"))).anyTimes();
EasyMock.expect(taskStorage.getStatus("id3")).andReturn(Optional.of(TaskStatus.running("id3"))).anyTimes();
EasyMock.expect(taskStorage.getTask("id1")).andReturn(Optional.of(id1)).anyTimes();
EasyMock.expect(taskStorage.getTask("id2")).andReturn(Optional.of(id2)).anyTimes();
EasyMock.expect(taskStorage.getTask("id3")).andReturn(Optional.of(id3)).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
EasyMock.expect(taskClient.getStatusAsync("id1")).andReturn(Futures.immediateFuture(Status.READING));
EasyMock.expect(taskClient.getStatusAsync("id2")).andReturn(Futures.immediateFuture(Status.READING));
EasyMock.expect(taskClient.getStatusAsync("id3")).andReturn(Futures.immediateFuture(Status.READING));
final DateTime startTime = DateTimes.nowUtc();
EasyMock.expect(taskClient.getStartTimeAsync("id1")).andReturn(Futures.immediateFuture(startTime));
EasyMock.expect(taskClient.getStartTimeAsync("id2")).andReturn(Futures.immediateFuture(startTime));
EasyMock.expect(taskClient.getStartTimeAsync("id3")).andReturn(Futures.immediateFuture(startTime));
final TreeMap<Integer, Map<Integer, Long>> checkpoints = new TreeMap<>();
checkpoints.put(0, ImmutableMap.of(0, 10L, 1, 20L, 2, 30L));
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("id1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints)).times(1);
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("id2"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints)).times(1);
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("id3"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints)).times(1);
taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
replayAll();
supervisor.start();
supervisor.runInternal();
supervisor.moveTaskGroupToPendingCompletion(0);
supervisor.checkpoint(0, new KafkaDataSourceMetadata(new SeekableStreamStartSequenceNumbers<>(topic, checkpoints.get(0), ImmutableSet.of())));
while (supervisor.getNoticesQueueSize() > 0) {
Thread.sleep(100);
}
verifyAll();
Assert.assertNull(serviceEmitter.getStackTrace(), serviceEmitter.getStackTrace());
Assert.assertNull(serviceEmitter.getExceptionMessage(), serviceEmitter.getExceptionMessage());
Assert.assertNull(serviceEmitter.getExceptionClass());
}
use of org.apache.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.
the class KafkaSupervisorTest method testIsTaskCurrent.
@Test
public void testIsTaskCurrent() {
DateTime minMessageTime = DateTimes.nowUtc();
DateTime maxMessageTime = DateTimes.nowUtc().plus(10000);
KafkaSupervisor supervisor = getSupervisor(2, 1, true, "PT1H", new Period("P1D"), new Period("P1D"), false, kafkaHost, dataSchema, new KafkaSupervisorTuningConfig(null, 1000, null, null, 50000, null, new Period("P1Y"), new File("/test"), null, null, null, false, null, false, null, numThreads, TEST_CHAT_THREADS, TEST_CHAT_RETRIES, TEST_HTTP_TIMEOUT, TEST_SHUTDOWN_TIMEOUT, null, null, null, null, null));
supervisor.addTaskGroupToActivelyReadingTaskGroup(42, ImmutableMap.of(0, 0L, 2, 0L), Optional.of(minMessageTime), Optional.of(maxMessageTime), ImmutableSet.of("id1", "id2", "id3", "id4"), ImmutableSet.of());
DataSchema modifiedDataSchema = getDataSchema("some other datasource");
KafkaSupervisorTuningConfig modifiedTuningConfig = new KafkaSupervisorTuningConfig(null, // This is different
42, null, null, 50000, null, new Period("P1Y"), new File("/test"), null, null, null, false, null, null, null, numThreads, TEST_CHAT_THREADS, TEST_CHAT_RETRIES, TEST_HTTP_TIMEOUT, TEST_SHUTDOWN_TIMEOUT, null, null, null, null, null);
KafkaIndexTask taskFromStorage = createKafkaIndexTask("id1", 0, new SeekableStreamStartSequenceNumbers<>("topic", ImmutableMap.of(0, 0L, 2, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>("topic", ImmutableMap.of(0, Long.MAX_VALUE, 2, Long.MAX_VALUE)), minMessageTime, maxMessageTime, dataSchema, supervisor.getTuningConfig());
KafkaIndexTask taskFromStorageMismatchedDataSchema = createKafkaIndexTask("id2", 0, new SeekableStreamStartSequenceNumbers<>("topic", ImmutableMap.of(0, 0L, 2, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>("topic", ImmutableMap.of(0, Long.MAX_VALUE, 2, Long.MAX_VALUE)), minMessageTime, maxMessageTime, modifiedDataSchema, supervisor.getTuningConfig());
KafkaIndexTask taskFromStorageMismatchedTuningConfig = createKafkaIndexTask("id3", 0, new SeekableStreamStartSequenceNumbers<>("topic", ImmutableMap.of(0, 0L, 2, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>("topic", ImmutableMap.of(0, Long.MAX_VALUE, 2, Long.MAX_VALUE)), minMessageTime, maxMessageTime, dataSchema, modifiedTuningConfig);
KafkaIndexTask taskFromStorageMismatchedPartitionsWithTaskGroup = createKafkaIndexTask("id4", 0, new SeekableStreamStartSequenceNumbers<>("topic", ImmutableMap.of(0, 0L, 2, 6L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>("topic", ImmutableMap.of(0, Long.MAX_VALUE, 2, Long.MAX_VALUE)), minMessageTime, maxMessageTime, dataSchema, supervisor.getTuningConfig());
EasyMock.expect(taskStorage.getTask("id1")).andReturn(Optional.of(taskFromStorage)).once();
EasyMock.expect(taskStorage.getTask("id2")).andReturn(Optional.of(taskFromStorageMismatchedDataSchema)).once();
EasyMock.expect(taskStorage.getTask("id3")).andReturn(Optional.of(taskFromStorageMismatchedTuningConfig)).once();
EasyMock.expect(taskStorage.getTask("id4")).andReturn(Optional.of(taskFromStorageMismatchedPartitionsWithTaskGroup)).once();
replayAll();
Assert.assertTrue(supervisor.isTaskCurrent(42, "id1"));
Assert.assertFalse(supervisor.isTaskCurrent(42, "id2"));
Assert.assertFalse(supervisor.isTaskCurrent(42, "id3"));
Assert.assertFalse(supervisor.isTaskCurrent(42, "id4"));
verifyAll();
}
use of org.apache.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.
the class KafkaSupervisorTest method testCheckpointForUnknownTaskGroup.
@Test(timeout = 60_000L)
public void testCheckpointForUnknownTaskGroup() throws InterruptedException {
supervisor = getTestableSupervisor(2, 1, true, "PT1S", null, null);
final KafkaSupervisorTuningConfig tuningConfig = supervisor.getTuningConfig();
// not adding any events
final KafkaIndexTask id1 = createKafkaIndexTask("id1", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 0L, 1, 0L, 2, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, Long.MAX_VALUE, 1, Long.MAX_VALUE, 2, Long.MAX_VALUE)), null, null, tuningConfig);
final Task id2 = createKafkaIndexTask("id2", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 10L, 1, 20L, 2, 30L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, Long.MAX_VALUE, 1, Long.MAX_VALUE, 2, Long.MAX_VALUE)), null, null, tuningConfig);
final Task id3 = createKafkaIndexTask("id3", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 10L, 1, 20L, 2, 30L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, Long.MAX_VALUE, 1, Long.MAX_VALUE, 2, Long.MAX_VALUE)), null, null, tuningConfig);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of(id1, id2, id3)).anyTimes();
EasyMock.expect(taskStorage.getStatus("id1")).andReturn(Optional.of(TaskStatus.running("id1"))).anyTimes();
EasyMock.expect(taskStorage.getStatus("id2")).andReturn(Optional.of(TaskStatus.running("id2"))).anyTimes();
EasyMock.expect(taskStorage.getStatus("id3")).andReturn(Optional.of(TaskStatus.running("id3"))).anyTimes();
EasyMock.expect(taskStorage.getTask("id1")).andReturn(Optional.of(id1)).anyTimes();
EasyMock.expect(taskStorage.getTask("id2")).andReturn(Optional.of(id2)).anyTimes();
EasyMock.expect(taskStorage.getTask("id3")).andReturn(Optional.of(id3)).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
replayAll();
supervisor.start();
supervisor.checkpoint(0, new KafkaDataSourceMetadata(new SeekableStreamStartSequenceNumbers<>(topic, Collections.emptyMap(), ImmutableSet.of())));
while (supervisor.getNoticesQueueSize() > 0) {
Thread.sleep(100);
}
verifyAll();
while (serviceEmitter.getStackTrace() == null) {
Thread.sleep(100);
}
Assert.assertTrue(serviceEmitter.getStackTrace().startsWith("org.apache.druid.java.util.common.ISE: Cannot find"));
Assert.assertEquals("Cannot find taskGroup [0] among all activelyReadingTaskGroups [{}]", serviceEmitter.getExceptionMessage());
Assert.assertEquals(ISE.class, serviceEmitter.getExceptionClass());
}
Aggregations