Search in sources :

Example 1 with KafkaIndexTask

use of io.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.

the class KafkaSupervisorTest method testLatestOffset.

@Test
public /**
   * Test generating the starting offsets from the partition high water marks in Kafka.
   */
void testLatestOffset() throws Exception {
    supervisor = getSupervisor(1, 1, false, "PT1H", null);
    addSomeEvents(1100);
    Capture<KafkaIndexTask> captured = Capture.newInstance();
    expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    expect(taskMaster.getTaskRunner()).andReturn(Optional.<TaskRunner>absent()).anyTimes();
    expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.<Task>of()).anyTimes();
    expect(indexerMetadataStorageCoordinator.getDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    expect(taskQueue.add(capture(captured))).andReturn(true);
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    KafkaIndexTask task = captured.getValue();
    Assert.assertEquals(1100L, (long) task.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(0));
    Assert.assertEquals(1100L, (long) task.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(1));
    Assert.assertEquals(1100L, (long) task.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(2));
}
Also used : RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) Task(io.druid.indexing.common.task.Task) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaDataSourceMetadata(io.druid.indexing.kafka.KafkaDataSourceMetadata) TaskRunner(io.druid.indexing.overlord.TaskRunner) Test(org.junit.Test)

Example 2 with KafkaIndexTask

use of io.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.

the class KafkaSupervisor method discoverTasks.

private void discoverTasks() throws ExecutionException, InterruptedException, TimeoutException {
    int taskCount = 0;
    List<String> futureTaskIds = Lists.newArrayList();
    List<ListenableFuture<Boolean>> futures = Lists.newArrayList();
    List<Task> tasks = taskStorage.getActiveTasks();
    for (Task task : tasks) {
        if (!(task instanceof KafkaIndexTask) || !dataSource.equals(task.getDataSource())) {
            continue;
        }
        taskCount++;
        final KafkaIndexTask kafkaTask = (KafkaIndexTask) task;
        final String taskId = task.getId();
        // Determine which task group this task belongs to based on one of the partitions handled by this task. If we
        // later determine that this task is actively reading, we will make sure that it matches our current partition
        // allocation (getTaskGroupIdForPartition(partition) should return the same value for every partition being read
        // by this task) and kill it if it is not compatible. If the task is instead found to be in the publishing
        // state, we will permit it to complete even if it doesn't match our current partition allocation to support
        // seamless schema migration.
        Iterator<Integer> it = kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap().keySet().iterator();
        final Integer taskGroupId = (it.hasNext() ? getTaskGroupIdForPartition(it.next()) : null);
        if (taskGroupId != null) {
            // check to see if we already know about this task, either in [taskGroups] or in [pendingCompletionTaskGroups]
            // and if not add it to taskGroups or pendingCompletionTaskGroups (if status = PUBLISHING)
            TaskGroup taskGroup = taskGroups.get(taskGroupId);
            if (!isTaskInPendingCompletionGroups(taskId) && (taskGroup == null || !taskGroup.tasks.containsKey(taskId))) {
                futureTaskIds.add(taskId);
                futures.add(Futures.transform(taskClient.getStatusAsync(taskId), new Function<KafkaIndexTask.Status, Boolean>() {

                    @Override
                    public Boolean apply(KafkaIndexTask.Status status) {
                        if (status == KafkaIndexTask.Status.PUBLISHING) {
                            addDiscoveredTaskToPendingCompletionTaskGroups(taskGroupId, taskId, kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap());
                            // update partitionGroups with the publishing task's offsets (if they are greater than what is
                            // existing) so that the next tasks will start reading from where this task left off
                            Map<Integer, Long> publishingTaskCurrentOffsets = taskClient.getCurrentOffsets(taskId, true);
                            for (Map.Entry<Integer, Long> entry : publishingTaskCurrentOffsets.entrySet()) {
                                Integer partition = entry.getKey();
                                Long offset = entry.getValue();
                                ConcurrentHashMap<Integer, Long> partitionOffsets = partitionGroups.get(getTaskGroupIdForPartition(partition));
                                boolean succeeded;
                                do {
                                    succeeded = true;
                                    Long previousOffset = partitionOffsets.putIfAbsent(partition, offset);
                                    if (previousOffset != null && previousOffset < offset) {
                                        succeeded = partitionOffsets.replace(partition, previousOffset, offset);
                                    }
                                } while (!succeeded);
                            }
                        } else {
                            for (Integer partition : kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap().keySet()) {
                                if (!taskGroupId.equals(getTaskGroupIdForPartition(partition))) {
                                    log.warn("Stopping task [%s] which does not match the expected partition allocation", taskId);
                                    try {
                                        stopTask(taskId, false).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
                                    } catch (InterruptedException | ExecutionException | TimeoutException e) {
                                        log.warn(e, "Exception while stopping task");
                                    }
                                    return false;
                                }
                            }
                            if (taskGroups.putIfAbsent(taskGroupId, new TaskGroup(ImmutableMap.copyOf(kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap()), kafkaTask.getIOConfig().getMinimumMessageTime())) == null) {
                                log.debug("Created new task group [%d]", taskGroupId);
                            }
                            if (!isTaskCurrent(taskGroupId, taskId)) {
                                log.info("Stopping task [%s] which does not match the expected parameters and ingestion spec", taskId);
                                try {
                                    stopTask(taskId, false).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
                                } catch (InterruptedException | ExecutionException | TimeoutException e) {
                                    log.warn(e, "Exception while stopping task");
                                }
                                return false;
                            } else {
                                taskGroups.get(taskGroupId).tasks.putIfAbsent(taskId, new TaskData());
                            }
                        }
                        return true;
                    }
                }, workerExec));
            }
        }
    }
    List<Boolean> results = Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
    for (int i = 0; i < results.size(); i++) {
        if (results.get(i) == null) {
            String taskId = futureTaskIds.get(i);
            log.warn("Task [%s] failed to return status, killing task", taskId);
            killTask(taskId);
        }
    }
    log.debug("Found [%d] Kafka indexing tasks for dataSource [%s]", taskCount, dataSource);
}
Also used : Task(io.druid.indexing.common.task.Task) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) Function(com.google.common.base.Function) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) TaskStatus(io.druid.indexing.common.TaskStatus) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 3 with KafkaIndexTask

use of io.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.

the class KafkaSupervisorTest method testDatasourceMetadata.

@Test
public /**
   * Test generating the starting offsets from the partition data stored in druid_dataSource which contains the
   * offsets of the last built segments.
   */
void testDatasourceMetadata() throws Exception {
    supervisor = getSupervisor(1, 1, true, "PT1H", null);
    addSomeEvents(100);
    Capture<KafkaIndexTask> captured = Capture.newInstance();
    expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    expect(taskMaster.getTaskRunner()).andReturn(Optional.<TaskRunner>absent()).anyTimes();
    expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.<Task>of()).anyTimes();
    expect(indexerMetadataStorageCoordinator.getDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(new KafkaPartitions(KAFKA_TOPIC, ImmutableMap.of(0, 10L, 1, 20L, 2, 30L)))).anyTimes();
    expect(taskQueue.add(capture(captured))).andReturn(true);
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    KafkaIndexTask task = captured.getValue();
    KafkaIOConfig taskConfig = task.getIOConfig();
    Assert.assertEquals(String.format("sequenceName-0", DATASOURCE), taskConfig.getBaseSequenceName());
    Assert.assertEquals(10L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(0));
    Assert.assertEquals(20L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(1));
    Assert.assertEquals(30L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(2));
}
Also used : RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) Task(io.druid.indexing.common.task.Task) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaPartitions(io.druid.indexing.kafka.KafkaPartitions) KafkaIOConfig(io.druid.indexing.kafka.KafkaIOConfig) KafkaDataSourceMetadata(io.druid.indexing.kafka.KafkaDataSourceMetadata) TaskRunner(io.druid.indexing.overlord.TaskRunner) Test(org.junit.Test)

Example 4 with KafkaIndexTask

use of io.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.

the class KafkaSupervisorTest method testBeginPublishAndQueueNextTasks.

@Test
public void testBeginPublishAndQueueNextTasks() throws Exception {
    final TaskLocation location = new TaskLocation("testHost", 1234);
    supervisor = getSupervisor(2, 2, true, "PT1M", null);
    addSomeEvents(100);
    Capture<Task> captured = Capture.newInstance(CaptureType.ALL);
    expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    expect(taskRunner.getRunningTasks()).andReturn(Collections.EMPTY_LIST).anyTimes();
    expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.<Task>of()).anyTimes();
    expect(indexerMetadataStorageCoordinator.getDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    expect(taskQueue.add(capture(captured))).andReturn(true).times(4);
    taskRunner.registerListener(anyObject(TaskRunnerListener.class), anyObject(Executor.class));
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    List<Task> tasks = captured.getValues();
    Collection workItems = new ArrayList<>();
    for (Task task : tasks) {
        workItems.add(new TestTaskRunnerWorkItem(task.getId(), null, location));
    }
    reset(taskStorage, taskRunner, taskClient, taskQueue);
    captured = Capture.newInstance(CaptureType.ALL);
    expect(taskStorage.getActiveTasks()).andReturn(tasks).anyTimes();
    for (Task task : tasks) {
        expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
        expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
    }
    expect(taskRunner.getRunningTasks()).andReturn(workItems).anyTimes();
    expect(taskClient.getStatusAsync(anyString())).andReturn(Futures.immediateFuture(KafkaIndexTask.Status.READING)).anyTimes();
    expect(taskClient.getStartTimeAsync(EasyMock.contains("sequenceName-0"))).andReturn(Futures.immediateFuture(DateTime.now().minusMinutes(2))).andReturn(Futures.immediateFuture(DateTime.now()));
    expect(taskClient.getStartTimeAsync(EasyMock.contains("sequenceName-1"))).andReturn(Futures.immediateFuture(DateTime.now())).times(2);
    expect(taskClient.pauseAsync(EasyMock.contains("sequenceName-0"))).andReturn(Futures.immediateFuture((Map<Integer, Long>) ImmutableMap.of(0, 10L, 1, 20L, 2, 30L))).andReturn(Futures.immediateFuture((Map<Integer, Long>) ImmutableMap.of(0, 10L, 1, 15L, 2, 35L)));
    expect(taskClient.setEndOffsetsAsync(EasyMock.contains("sequenceName-0"), EasyMock.eq(ImmutableMap.of(0, 10L, 1, 20L, 2, 35L)), EasyMock.eq(true))).andReturn(Futures.immediateFuture(true)).times(2);
    expect(taskQueue.add(capture(captured))).andReturn(true).times(2);
    replay(taskStorage, taskRunner, taskClient, taskQueue);
    supervisor.runInternal();
    verifyAll();
    for (Task task : captured.getValues()) {
        KafkaIndexTask kafkaIndexTask = (KafkaIndexTask) task;
        Assert.assertEquals(dataSchema, kafkaIndexTask.getDataSchema());
        Assert.assertEquals(KafkaTuningConfig.copyOf(tuningConfig), kafkaIndexTask.getTuningConfig());
        KafkaIOConfig taskConfig = kafkaIndexTask.getIOConfig();
        Assert.assertEquals("sequenceName-0", taskConfig.getBaseSequenceName());
        Assert.assertTrue("isUseTransaction", taskConfig.isUseTransaction());
        Assert.assertFalse("pauseAfterRead", taskConfig.isPauseAfterRead());
        Assert.assertEquals(KAFKA_TOPIC, taskConfig.getStartPartitions().getTopic());
        Assert.assertEquals(10L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(0));
        Assert.assertEquals(20L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(1));
        Assert.assertEquals(35L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(2));
    }
}
Also used : TaskRunnerListener(io.druid.indexing.overlord.TaskRunnerListener) RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) Task(io.druid.indexing.common.task.Task) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) ArrayList(java.util.ArrayList) TaskLocation(io.druid.indexing.common.TaskLocation) Executor(java.util.concurrent.Executor) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaIOConfig(io.druid.indexing.kafka.KafkaIOConfig) Collection(java.util.Collection) KafkaDataSourceMetadata(io.druid.indexing.kafka.KafkaDataSourceMetadata) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 5 with KafkaIndexTask

use of io.druid.indexing.kafka.KafkaIndexTask in project druid by druid-io.

the class KafkaSupervisorTest method testMultiTask.

@Test
public void testMultiTask() throws Exception {
    supervisor = getSupervisor(1, 2, true, "PT1H", null);
    addSomeEvents(1);
    Capture<KafkaIndexTask> captured = Capture.newInstance(CaptureType.ALL);
    expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    expect(taskMaster.getTaskRunner()).andReturn(Optional.<TaskRunner>absent()).anyTimes();
    expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.<Task>of()).anyTimes();
    expect(indexerMetadataStorageCoordinator.getDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    expect(taskQueue.add(capture(captured))).andReturn(true).times(2);
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    KafkaIndexTask task1 = captured.getValues().get(0);
    Assert.assertEquals(2, task1.getIOConfig().getStartPartitions().getPartitionOffsetMap().size());
    Assert.assertEquals(2, task1.getIOConfig().getEndPartitions().getPartitionOffsetMap().size());
    Assert.assertEquals(0L, (long) task1.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(0));
    Assert.assertEquals(Long.MAX_VALUE, (long) task1.getIOConfig().getEndPartitions().getPartitionOffsetMap().get(0));
    Assert.assertEquals(0L, (long) task1.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(2));
    Assert.assertEquals(Long.MAX_VALUE, (long) task1.getIOConfig().getEndPartitions().getPartitionOffsetMap().get(2));
    KafkaIndexTask task2 = captured.getValues().get(1);
    Assert.assertEquals(1, task2.getIOConfig().getStartPartitions().getPartitionOffsetMap().size());
    Assert.assertEquals(1, task2.getIOConfig().getEndPartitions().getPartitionOffsetMap().size());
    Assert.assertEquals(0L, (long) task2.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(1));
    Assert.assertEquals(Long.MAX_VALUE, (long) task2.getIOConfig().getEndPartitions().getPartitionOffsetMap().get(1));
}
Also used : RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) Task(io.druid.indexing.common.task.Task) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaDataSourceMetadata(io.druid.indexing.kafka.KafkaDataSourceMetadata) TaskRunner(io.druid.indexing.overlord.TaskRunner) Test(org.junit.Test)

Aggregations

KafkaIndexTask (io.druid.indexing.kafka.KafkaIndexTask)17 Task (io.druid.indexing.common.task.Task)16 RealtimeIndexTask (io.druid.indexing.common.task.RealtimeIndexTask)15 KafkaDataSourceMetadata (io.druid.indexing.kafka.KafkaDataSourceMetadata)15 Test (org.junit.Test)15 TaskRunnerListener (io.druid.indexing.overlord.TaskRunnerListener)10 Executor (java.util.concurrent.Executor)10 KafkaIOConfig (io.druid.indexing.kafka.KafkaIOConfig)8 ImmutableMap (com.google.common.collect.ImmutableMap)6 KafkaPartitions (io.druid.indexing.kafka.KafkaPartitions)6 Map (java.util.Map)6 TaskLocation (io.druid.indexing.common.TaskLocation)5 TaskRunner (io.druid.indexing.overlord.TaskRunner)5 ArrayList (java.util.ArrayList)5 Collection (java.util.Collection)5 SupervisorReport (io.druid.indexing.overlord.supervisor.SupervisorReport)2 HashMap (java.util.HashMap)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 EasyMock.anyString (org.easymock.EasyMock.anyString)2 DateTime (org.joda.time.DateTime)2