use of org.apache.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.
the class KafkaSupervisorTest method testKillUnresponsiveTasksWhileSettingEndOffsets.
@Test
public void testKillUnresponsiveTasksWhileSettingEndOffsets() throws Exception {
final TaskLocation location = new TaskLocation("testHost", 1234, -1);
supervisor = getTestableSupervisor(2, 2, true, "PT1M", null, null);
addSomeEvents(100);
Capture<Task> captured = Capture.newInstance(CaptureType.ALL);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true).times(4);
taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
replayAll();
supervisor.start();
supervisor.runInternal();
verifyAll();
List<Task> tasks = captured.getValues();
Collection workItems = new ArrayList<>();
for (Task task : tasks) {
workItems.add(new TestTaskRunnerWorkItem(task, null, location));
}
EasyMock.reset(taskStorage, taskRunner, taskClient, taskQueue);
TreeMap<Integer, Map<Integer, Long>> checkpoints1 = new TreeMap<>();
checkpoints1.put(0, ImmutableMap.of(0, 0L, 2, 0L));
TreeMap<Integer, Map<Integer, Long>> checkpoints2 = new TreeMap<>();
checkpoints2.put(0, ImmutableMap.of(1, 0L));
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints1)).times(2);
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints2)).times(2);
captured = Capture.newInstance(CaptureType.ALL);
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(tasks).anyTimes();
for (Task task : tasks) {
EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
}
EasyMock.expect(taskRunner.getRunningTasks()).andReturn(workItems).anyTimes();
EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(Status.READING)).anyTimes();
EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.contains("sequenceName-0"))).andReturn(Futures.immediateFuture(DateTimes.nowUtc().minusMinutes(2))).andReturn(Futures.immediateFuture(DateTimes.nowUtc()));
EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.contains("sequenceName-1"))).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).times(2);
EasyMock.expect(taskClient.pauseAsync(EasyMock.contains("sequenceName-0"))).andReturn(Futures.immediateFuture(ImmutableMap.of(0, 10L, 1, 20L, 2, 30L))).andReturn(Futures.immediateFuture(ImmutableMap.of(0, 10L, 1, 15L, 2, 35L)));
EasyMock.expect(taskClient.setEndOffsetsAsync(EasyMock.contains("sequenceName-0"), EasyMock.eq(ImmutableMap.of(0, 10L, 1, 20L, 2, 35L)), EasyMock.eq(true))).andReturn(Futures.immediateFailedFuture(new RuntimeException())).times(2);
taskQueue.shutdown(EasyMock.contains("sequenceName-0"), EasyMock.eq("Task [%s] failed to respond to [set end offsets] in a timely manner, killing task"), EasyMock.contains("sequenceName-0"));
EasyMock.expectLastCall().times(2);
EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true).times(2);
EasyMock.replay(taskStorage, taskRunner, taskClient, taskQueue);
supervisor.runInternal();
verifyAll();
for (Task task : captured.getValues()) {
KafkaIndexTaskIOConfig taskConfig = ((KafkaIndexTask) task).getIOConfig();
Assert.assertEquals(0L, (long) taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(0));
Assert.assertEquals(0L, (long) taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(2));
}
}
use of org.apache.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.
the class KafkaSupervisorTest method testLatestOffset.
/**
* Test generating the starting offsets from the partition high water marks in Kafka.
*/
@Test
public void testLatestOffset() throws Exception {
supervisor = getTestableSupervisor(1, 1, false, "PT1H", null, null);
addSomeEvents(1100);
Capture<KafkaIndexTask> captured = Capture.newInstance();
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true);
replayAll();
supervisor.start();
supervisor.runInternal();
verifyAll();
KafkaIndexTask task = captured.getValue();
Assert.assertEquals(1101L, task.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(0).longValue());
Assert.assertEquals(1101L, task.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(1).longValue());
Assert.assertEquals(1101L, task.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(2).longValue());
}
use of org.apache.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.
the class KafkaSupervisorTest method testRequeueTaskWhenFailed.
@Test
public void testRequeueTaskWhenFailed() throws Exception {
supervisor = getTestableSupervisor(2, 2, true, "PT1H", null, null);
addSomeEvents(1);
Capture<Task> captured = Capture.newInstance(CaptureType.ALL);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(Status.NOT_STARTED)).anyTimes();
EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true).times(4);
TreeMap<Integer, Map<Integer, Long>> checkpoints1 = new TreeMap<>();
checkpoints1.put(0, ImmutableMap.of(0, 0L, 2, 0L));
TreeMap<Integer, Map<Integer, Long>> checkpoints2 = new TreeMap<>();
checkpoints2.put(0, ImmutableMap.of(1, 0L));
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints1)).anyTimes();
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints2)).anyTimes();
taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
replayAll();
supervisor.start();
supervisor.runInternal();
verifyAll();
List<Task> tasks = captured.getValues();
// test that running the main loop again checks the status of the tasks that were created and does nothing if they
// are all still running
EasyMock.reset(taskStorage);
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(tasks).anyTimes();
for (Task task : tasks) {
EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
}
EasyMock.replay(taskStorage);
supervisor.runInternal();
verifyAll();
// test that a task failing causes a new task to be re-queued with the same parameters
Capture<Task> aNewTaskCapture = Capture.newInstance();
List<Task> imStillAlive = tasks.subList(0, 3);
KafkaIndexTask iHaveFailed = (KafkaIndexTask) tasks.get(3);
EasyMock.reset(taskStorage);
EasyMock.reset(taskQueue);
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(imStillAlive).anyTimes();
for (Task task : imStillAlive) {
EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
}
EasyMock.expect(taskStorage.getStatus(iHaveFailed.getId())).andReturn(Optional.of(TaskStatus.failure(iHaveFailed.getId(), "Dummy task status failure err message")));
EasyMock.expect(taskStorage.getTask(iHaveFailed.getId())).andReturn(Optional.of(iHaveFailed)).anyTimes();
EasyMock.expect(taskQueue.add(EasyMock.capture(aNewTaskCapture))).andReturn(true);
EasyMock.replay(taskStorage);
EasyMock.replay(taskQueue);
supervisor.runInternal();
verifyAll();
Assert.assertNotEquals(iHaveFailed.getId(), aNewTaskCapture.getValue().getId());
Assert.assertEquals(iHaveFailed.getIOConfig().getBaseSequenceName(), ((KafkaIndexTask) aNewTaskCapture.getValue()).getIOConfig().getBaseSequenceName());
}
use of org.apache.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.
the class KafkaSupervisorTest method testNoInitialStateWithAutoscaler.
@Test
public void testNoInitialStateWithAutoscaler() throws Exception {
KafkaIndexTaskClientFactory taskClientFactory = new KafkaIndexTaskClientFactory(null, null) {
@Override
public KafkaIndexTaskClient build(TaskInfoProvider taskInfoProvider, String dataSource, int numThreads, Duration httpTimeout, long numRetries) {
Assert.assertEquals(TEST_CHAT_THREADS, numThreads);
Assert.assertEquals(TEST_HTTP_TIMEOUT.toStandardDuration(), httpTimeout);
Assert.assertEquals(TEST_CHAT_RETRIES, numRetries);
return taskClient;
}
};
HashMap<String, Object> autoScalerConfig = new HashMap<>();
autoScalerConfig.put("enableTaskAutoScaler", true);
autoScalerConfig.put("lagCollectionIntervalMillis", 500);
autoScalerConfig.put("lagCollectionRangeMillis", 500);
autoScalerConfig.put("scaleOutThreshold", 0);
autoScalerConfig.put("triggerScaleOutFractionThreshold", 0.0);
autoScalerConfig.put("scaleInThreshold", 1000000);
autoScalerConfig.put("triggerScaleInFractionThreshold", 0.8);
autoScalerConfig.put("scaleActionStartDelayMillis", 0);
autoScalerConfig.put("scaleActionPeriodMillis", 100);
autoScalerConfig.put("taskCountMax", 2);
autoScalerConfig.put("taskCountMin", 1);
autoScalerConfig.put("scaleInStep", 1);
autoScalerConfig.put("scaleOutStep", 2);
autoScalerConfig.put("minTriggerScaleActionFrequencyMillis", 1200000);
final Map<String, Object> consumerProperties = KafkaConsumerConfigs.getConsumerProperties();
consumerProperties.put("myCustomKey", "myCustomValue");
consumerProperties.put("bootstrap.servers", kafkaHost);
KafkaSupervisorIOConfig kafkaSupervisorIOConfig = new KafkaSupervisorIOConfig(topic, INPUT_FORMAT, 1, 1, new Period("PT1H"), consumerProperties, OBJECT_MAPPER.convertValue(autoScalerConfig, LagBasedAutoScalerConfig.class), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, new Period("P1D"), new Period("PT30S"), true, new Period("PT30M"), null, null, null);
final KafkaSupervisorTuningConfig tuningConfigOri = new KafkaSupervisorTuningConfig(null, 1000, null, null, 50000, null, new Period("P1Y"), new File("/test"), null, null, null, false, null, false, null, numThreads, TEST_CHAT_THREADS, TEST_CHAT_RETRIES, TEST_HTTP_TIMEOUT, TEST_SHUTDOWN_TIMEOUT, null, null, null, null, null);
EasyMock.expect(ingestionSchema.getIOConfig()).andReturn(kafkaSupervisorIOConfig).anyTimes();
EasyMock.expect(ingestionSchema.getDataSchema()).andReturn(dataSchema).anyTimes();
EasyMock.expect(ingestionSchema.getTuningConfig()).andReturn(tuningConfigOri).anyTimes();
EasyMock.replay(ingestionSchema);
SeekableStreamSupervisorSpec testableSupervisorSpec = new KafkaSupervisorSpec(ingestionSchema, dataSchema, tuningConfigOri, kafkaSupervisorIOConfig, null, false, taskStorage, taskMaster, indexerMetadataStorageCoordinator, taskClientFactory, OBJECT_MAPPER, new NoopServiceEmitter(), new DruidMonitorSchedulerConfig(), rowIngestionMetersFactory, new SupervisorStateManagerConfig());
supervisor = new TestableKafkaSupervisor(taskStorage, taskMaster, indexerMetadataStorageCoordinator, taskClientFactory, OBJECT_MAPPER, (KafkaSupervisorSpec) testableSupervisorSpec, rowIngestionMetersFactory);
SupervisorTaskAutoScaler autoscaler = testableSupervisorSpec.createAutoscaler(supervisor);
final KafkaSupervisorTuningConfig tuningConfig = supervisor.getTuningConfig();
addSomeEvents(1);
Capture<KafkaIndexTask> captured = Capture.newInstance();
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskMaster.getSupervisorManager()).andReturn(Optional.absent()).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true);
taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
replayAll();
supervisor.start();
int taskCountBeforeScale = supervisor.getIoConfig().getTaskCount();
Assert.assertEquals(1, taskCountBeforeScale);
autoscaler.start();
supervisor.runInternal();
Thread.sleep(1 * 1000);
verifyAll();
int taskCountAfterScale = supervisor.getIoConfig().getTaskCount();
Assert.assertEquals(2, taskCountAfterScale);
KafkaIndexTask task = captured.getValue();
Assert.assertEquals(KafkaSupervisorTest.dataSchema, task.getDataSchema());
Assert.assertEquals(tuningConfig.convertToTaskTuningConfig(), task.getTuningConfig());
KafkaIndexTaskIOConfig taskConfig = task.getIOConfig();
Assert.assertEquals(kafkaHost, taskConfig.getConsumerProperties().get("bootstrap.servers"));
Assert.assertEquals("myCustomValue", taskConfig.getConsumerProperties().get("myCustomKey"));
Assert.assertEquals("sequenceName-0", taskConfig.getBaseSequenceName());
Assert.assertTrue("isUseTransaction", taskConfig.isUseTransaction());
Assert.assertFalse("minimumMessageTime", taskConfig.getMinimumMessageTime().isPresent());
Assert.assertFalse("maximumMessageTime", taskConfig.getMaximumMessageTime().isPresent());
Assert.assertEquals(topic, taskConfig.getStartSequenceNumbers().getStream());
Assert.assertEquals(0L, (long) taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(0));
Assert.assertEquals(0L, (long) taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(1));
Assert.assertEquals(0L, (long) taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(2));
Assert.assertEquals(topic, taskConfig.getEndSequenceNumbers().getStream());
Assert.assertEquals(Long.MAX_VALUE, (long) taskConfig.getEndSequenceNumbers().getPartitionSequenceNumberMap().get(0));
Assert.assertEquals(Long.MAX_VALUE, (long) taskConfig.getEndSequenceNumbers().getPartitionSequenceNumberMap().get(1));
Assert.assertEquals(Long.MAX_VALUE, (long) taskConfig.getEndSequenceNumbers().getPartitionSequenceNumberMap().get(2));
autoscaler.reset();
autoscaler.stop();
}
use of org.apache.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.
the class KafkaSupervisorTest method testReplicas.
@Test
public void testReplicas() throws Exception {
supervisor = getTestableSupervisor(2, 1, true, "PT1H", null, null);
addSomeEvents(1);
Capture<KafkaIndexTask> captured = Capture.newInstance(CaptureType.ALL);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true).times(2);
replayAll();
supervisor.start();
supervisor.runInternal();
verifyAll();
KafkaIndexTask task1 = captured.getValues().get(0);
Assert.assertEquals(3, task1.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().size());
Assert.assertEquals(3, task1.getIOConfig().getEndSequenceNumbers().getPartitionSequenceNumberMap().size());
Assert.assertEquals(0L, task1.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(0).longValue());
Assert.assertEquals(0L, task1.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(1).longValue());
Assert.assertEquals(0L, task1.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(2).longValue());
KafkaIndexTask task2 = captured.getValues().get(1);
Assert.assertEquals(3, task2.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().size());
Assert.assertEquals(3, task2.getIOConfig().getEndSequenceNumbers().getPartitionSequenceNumberMap().size());
Assert.assertEquals(0L, task2.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(0).longValue());
Assert.assertEquals(0L, task2.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(1).longValue());
Assert.assertEquals(0L, task2.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(2).longValue());
}
Aggregations