use of io.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.
the class KafkaSupervisorTest method testLatestOffset.
@Test
public /**
* Test generating the starting offsets from the partition high water marks in Kafka.
*/
void testLatestOffset() throws Exception {
supervisor = getSupervisor(1, 1, false, "PT1H", null);
addSomeEvents(1100);
Capture<KafkaIndexTask> captured = Capture.newInstance();
expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
expect(taskMaster.getTaskRunner()).andReturn(Optional.<TaskRunner>absent()).anyTimes();
expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.<Task>of()).anyTimes();
expect(indexerMetadataStorageCoordinator.getDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
expect(taskQueue.add(capture(captured))).andReturn(true);
replayAll();
supervisor.start();
supervisor.runInternal();
verifyAll();
KafkaIndexTask task = captured.getValue();
Assert.assertEquals(1100L, (long) task.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(0));
Assert.assertEquals(1100L, (long) task.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(1));
Assert.assertEquals(1100L, (long) task.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(2));
}
use of io.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.
the class KafkaSupervisor method discoverTasks.
private void discoverTasks() throws ExecutionException, InterruptedException, TimeoutException {
int taskCount = 0;
List<String> futureTaskIds = Lists.newArrayList();
List<ListenableFuture<Boolean>> futures = Lists.newArrayList();
List<Task> tasks = taskStorage.getActiveTasks();
for (Task task : tasks) {
if (!(task instanceof KafkaIndexTask) || !dataSource.equals(task.getDataSource())) {
continue;
}
taskCount++;
final KafkaIndexTask kafkaTask = (KafkaIndexTask) task;
final String taskId = task.getId();
// Determine which task group this task belongs to based on one of the partitions handled by this task. If we
// later determine that this task is actively reading, we will make sure that it matches our current partition
// allocation (getTaskGroupIdForPartition(partition) should return the same value for every partition being read
// by this task) and kill it if it is not compatible. If the task is instead found to be in the publishing
// state, we will permit it to complete even if it doesn't match our current partition allocation to support
// seamless schema migration.
Iterator<Integer> it = kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap().keySet().iterator();
final Integer taskGroupId = (it.hasNext() ? getTaskGroupIdForPartition(it.next()) : null);
if (taskGroupId != null) {
// check to see if we already know about this task, either in [taskGroups] or in [pendingCompletionTaskGroups]
// and if not add it to taskGroups or pendingCompletionTaskGroups (if status = PUBLISHING)
TaskGroup taskGroup = taskGroups.get(taskGroupId);
if (!isTaskInPendingCompletionGroups(taskId) && (taskGroup == null || !taskGroup.tasks.containsKey(taskId))) {
futureTaskIds.add(taskId);
futures.add(Futures.transform(taskClient.getStatusAsync(taskId), new Function<KafkaIndexTask.Status, Boolean>() {
@Override
public Boolean apply(KafkaIndexTask.Status status) {
if (status == KafkaIndexTask.Status.PUBLISHING) {
addDiscoveredTaskToPendingCompletionTaskGroups(taskGroupId, taskId, kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap());
// update partitionGroups with the publishing task's offsets (if they are greater than what is
// existing) so that the next tasks will start reading from where this task left off
Map<Integer, Long> publishingTaskCurrentOffsets = taskClient.getCurrentOffsets(taskId, true);
for (Map.Entry<Integer, Long> entry : publishingTaskCurrentOffsets.entrySet()) {
Integer partition = entry.getKey();
Long offset = entry.getValue();
ConcurrentHashMap<Integer, Long> partitionOffsets = partitionGroups.get(getTaskGroupIdForPartition(partition));
boolean succeeded;
do {
succeeded = true;
Long previousOffset = partitionOffsets.putIfAbsent(partition, offset);
if (previousOffset != null && previousOffset < offset) {
succeeded = partitionOffsets.replace(partition, previousOffset, offset);
}
} while (!succeeded);
}
} else {
for (Integer partition : kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap().keySet()) {
if (!taskGroupId.equals(getTaskGroupIdForPartition(partition))) {
log.warn("Stopping task [%s] which does not match the expected partition allocation", taskId);
try {
stopTask(taskId, false).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
} catch (InterruptedException | ExecutionException | TimeoutException e) {
log.warn(e, "Exception while stopping task");
}
return false;
}
}
if (taskGroups.putIfAbsent(taskGroupId, new TaskGroup(ImmutableMap.copyOf(kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap()), kafkaTask.getIOConfig().getMinimumMessageTime())) == null) {
log.debug("Created new task group [%d]", taskGroupId);
}
if (!isTaskCurrent(taskGroupId, taskId)) {
log.info("Stopping task [%s] which does not match the expected parameters and ingestion spec", taskId);
try {
stopTask(taskId, false).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
} catch (InterruptedException | ExecutionException | TimeoutException e) {
log.warn(e, "Exception while stopping task");
}
return false;
} else {
taskGroups.get(taskGroupId).tasks.putIfAbsent(taskId, new TaskData());
}
}
return true;
}
}, workerExec));
}
}
}
List<Boolean> results = Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
for (int i = 0; i < results.size(); i++) {
if (results.get(i) == null) {
String taskId = futureTaskIds.get(i);
log.warn("Task [%s] failed to return status, killing task", taskId);
killTask(taskId);
}
}
log.debug("Found [%d] Kafka indexing tasks for dataSource [%s]", taskCount, dataSource);
}
use of io.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.
the class KafkaSupervisorTest method testDatasourceMetadata.
@Test
public /**
* Test generating the starting offsets from the partition data stored in druid_dataSource which contains the
* offsets of the last built segments.
*/
void testDatasourceMetadata() throws Exception {
supervisor = getSupervisor(1, 1, true, "PT1H", null);
addSomeEvents(100);
Capture<KafkaIndexTask> captured = Capture.newInstance();
expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
expect(taskMaster.getTaskRunner()).andReturn(Optional.<TaskRunner>absent()).anyTimes();
expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.<Task>of()).anyTimes();
expect(indexerMetadataStorageCoordinator.getDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(new KafkaPartitions(KAFKA_TOPIC, ImmutableMap.of(0, 10L, 1, 20L, 2, 30L)))).anyTimes();
expect(taskQueue.add(capture(captured))).andReturn(true);
replayAll();
supervisor.start();
supervisor.runInternal();
verifyAll();
KafkaIndexTask task = captured.getValue();
KafkaIOConfig taskConfig = task.getIOConfig();
Assert.assertEquals(String.format("sequenceName-0", DATASOURCE), taskConfig.getBaseSequenceName());
Assert.assertEquals(10L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(0));
Assert.assertEquals(20L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(1));
Assert.assertEquals(30L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(2));
}
use of io.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.
the class KafkaSupervisorTest method testBeginPublishAndQueueNextTasks.
@Test
public void testBeginPublishAndQueueNextTasks() throws Exception {
final TaskLocation location = new TaskLocation("testHost", 1234);
supervisor = getSupervisor(2, 2, true, "PT1M", null);
addSomeEvents(100);
Capture<Task> captured = Capture.newInstance(CaptureType.ALL);
expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
expect(taskRunner.getRunningTasks()).andReturn(Collections.EMPTY_LIST).anyTimes();
expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.<Task>of()).anyTimes();
expect(indexerMetadataStorageCoordinator.getDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
expect(taskQueue.add(capture(captured))).andReturn(true).times(4);
taskRunner.registerListener(anyObject(TaskRunnerListener.class), anyObject(Executor.class));
replayAll();
supervisor.start();
supervisor.runInternal();
verifyAll();
List<Task> tasks = captured.getValues();
Collection workItems = new ArrayList<>();
for (Task task : tasks) {
workItems.add(new TestTaskRunnerWorkItem(task.getId(), null, location));
}
reset(taskStorage, taskRunner, taskClient, taskQueue);
captured = Capture.newInstance(CaptureType.ALL);
expect(taskStorage.getActiveTasks()).andReturn(tasks).anyTimes();
for (Task task : tasks) {
expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
}
expect(taskRunner.getRunningTasks()).andReturn(workItems).anyTimes();
expect(taskClient.getStatusAsync(anyString())).andReturn(Futures.immediateFuture(KafkaIndexTask.Status.READING)).anyTimes();
expect(taskClient.getStartTimeAsync(EasyMock.contains("sequenceName-0"))).andReturn(Futures.immediateFuture(DateTime.now().minusMinutes(2))).andReturn(Futures.immediateFuture(DateTime.now()));
expect(taskClient.getStartTimeAsync(EasyMock.contains("sequenceName-1"))).andReturn(Futures.immediateFuture(DateTime.now())).times(2);
expect(taskClient.pauseAsync(EasyMock.contains("sequenceName-0"))).andReturn(Futures.immediateFuture((Map<Integer, Long>) ImmutableMap.of(0, 10L, 1, 20L, 2, 30L))).andReturn(Futures.immediateFuture((Map<Integer, Long>) ImmutableMap.of(0, 10L, 1, 15L, 2, 35L)));
expect(taskClient.setEndOffsetsAsync(EasyMock.contains("sequenceName-0"), EasyMock.eq(ImmutableMap.of(0, 10L, 1, 20L, 2, 35L)), EasyMock.eq(true))).andReturn(Futures.immediateFuture(true)).times(2);
expect(taskQueue.add(capture(captured))).andReturn(true).times(2);
replay(taskStorage, taskRunner, taskClient, taskQueue);
supervisor.runInternal();
verifyAll();
for (Task task : captured.getValues()) {
KafkaIndexTask kafkaIndexTask = (KafkaIndexTask) task;
Assert.assertEquals(dataSchema, kafkaIndexTask.getDataSchema());
Assert.assertEquals(KafkaTuningConfig.copyOf(tuningConfig), kafkaIndexTask.getTuningConfig());
KafkaIOConfig taskConfig = kafkaIndexTask.getIOConfig();
Assert.assertEquals("sequenceName-0", taskConfig.getBaseSequenceName());
Assert.assertTrue("isUseTransaction", taskConfig.isUseTransaction());
Assert.assertFalse("pauseAfterRead", taskConfig.isPauseAfterRead());
Assert.assertEquals(KAFKA_TOPIC, taskConfig.getStartPartitions().getTopic());
Assert.assertEquals(10L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(0));
Assert.assertEquals(20L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(1));
Assert.assertEquals(35L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(2));
}
}
use of io.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.
the class KafkaSupervisorTest method testMultiTask.
@Test
public void testMultiTask() throws Exception {
supervisor = getSupervisor(1, 2, true, "PT1H", null);
addSomeEvents(1);
Capture<KafkaIndexTask> captured = Capture.newInstance(CaptureType.ALL);
expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
expect(taskMaster.getTaskRunner()).andReturn(Optional.<TaskRunner>absent()).anyTimes();
expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.<Task>of()).anyTimes();
expect(indexerMetadataStorageCoordinator.getDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
expect(taskQueue.add(capture(captured))).andReturn(true).times(2);
replayAll();
supervisor.start();
supervisor.runInternal();
verifyAll();
KafkaIndexTask task1 = captured.getValues().get(0);
Assert.assertEquals(2, task1.getIOConfig().getStartPartitions().getPartitionOffsetMap().size());
Assert.assertEquals(2, task1.getIOConfig().getEndPartitions().getPartitionOffsetMap().size());
Assert.assertEquals(0L, (long) task1.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(0));
Assert.assertEquals(Long.MAX_VALUE, (long) task1.getIOConfig().getEndPartitions().getPartitionOffsetMap().get(0));
Assert.assertEquals(0L, (long) task1.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(2));
Assert.assertEquals(Long.MAX_VALUE, (long) task1.getIOConfig().getEndPartitions().getPartitionOffsetMap().get(2));
KafkaIndexTask task2 = captured.getValues().get(1);
Assert.assertEquals(1, task2.getIOConfig().getStartPartitions().getPartitionOffsetMap().size());
Assert.assertEquals(1, task2.getIOConfig().getEndPartitions().getPartitionOffsetMap().size());
Assert.assertEquals(0L, (long) task2.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(1));
Assert.assertEquals(Long.MAX_VALUE, (long) task2.getIOConfig().getEndPartitions().getPartitionOffsetMap().get(1));
}
Aggregations