Search in sources :

Example 1 with Task

use of io.druid.indexing.common.task.Task in project druid by druid-io.

the class KafkaSupervisor method discoverTasks.

private void discoverTasks() throws ExecutionException, InterruptedException, TimeoutException {
    int taskCount = 0;
    List<String> futureTaskIds = Lists.newArrayList();
    List<ListenableFuture<Boolean>> futures = Lists.newArrayList();
    List<Task> tasks = taskStorage.getActiveTasks();
    for (Task task : tasks) {
        if (!(task instanceof KafkaIndexTask) || !dataSource.equals(task.getDataSource())) {
            continue;
        }
        taskCount++;
        final KafkaIndexTask kafkaTask = (KafkaIndexTask) task;
        final String taskId = task.getId();
        // Determine which task group this task belongs to based on one of the partitions handled by this task. If we
        // later determine that this task is actively reading, we will make sure that it matches our current partition
        // allocation (getTaskGroupIdForPartition(partition) should return the same value for every partition being read
        // by this task) and kill it if it is not compatible. If the task is instead found to be in the publishing
        // state, we will permit it to complete even if it doesn't match our current partition allocation to support
        // seamless schema migration.
        Iterator<Integer> it = kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap().keySet().iterator();
        final Integer taskGroupId = (it.hasNext() ? getTaskGroupIdForPartition(it.next()) : null);
        if (taskGroupId != null) {
            // check to see if we already know about this task, either in [taskGroups] or in [pendingCompletionTaskGroups]
            // and if not add it to taskGroups or pendingCompletionTaskGroups (if status = PUBLISHING)
            TaskGroup taskGroup = taskGroups.get(taskGroupId);
            if (!isTaskInPendingCompletionGroups(taskId) && (taskGroup == null || !taskGroup.tasks.containsKey(taskId))) {
                futureTaskIds.add(taskId);
                futures.add(Futures.transform(taskClient.getStatusAsync(taskId), new Function<KafkaIndexTask.Status, Boolean>() {

                    @Override
                    public Boolean apply(KafkaIndexTask.Status status) {
                        if (status == KafkaIndexTask.Status.PUBLISHING) {
                            addDiscoveredTaskToPendingCompletionTaskGroups(taskGroupId, taskId, kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap());
                            // update partitionGroups with the publishing task's offsets (if they are greater than what is
                            // existing) so that the next tasks will start reading from where this task left off
                            Map<Integer, Long> publishingTaskCurrentOffsets = taskClient.getCurrentOffsets(taskId, true);
                            for (Map.Entry<Integer, Long> entry : publishingTaskCurrentOffsets.entrySet()) {
                                Integer partition = entry.getKey();
                                Long offset = entry.getValue();
                                ConcurrentHashMap<Integer, Long> partitionOffsets = partitionGroups.get(getTaskGroupIdForPartition(partition));
                                boolean succeeded;
                                do {
                                    succeeded = true;
                                    Long previousOffset = partitionOffsets.putIfAbsent(partition, offset);
                                    if (previousOffset != null && previousOffset < offset) {
                                        succeeded = partitionOffsets.replace(partition, previousOffset, offset);
                                    }
                                } while (!succeeded);
                            }
                        } else {
                            for (Integer partition : kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap().keySet()) {
                                if (!taskGroupId.equals(getTaskGroupIdForPartition(partition))) {
                                    log.warn("Stopping task [%s] which does not match the expected partition allocation", taskId);
                                    try {
                                        stopTask(taskId, false).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
                                    } catch (InterruptedException | ExecutionException | TimeoutException e) {
                                        log.warn(e, "Exception while stopping task");
                                    }
                                    return false;
                                }
                            }
                            if (taskGroups.putIfAbsent(taskGroupId, new TaskGroup(ImmutableMap.copyOf(kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap()), kafkaTask.getIOConfig().getMinimumMessageTime())) == null) {
                                log.debug("Created new task group [%d]", taskGroupId);
                            }
                            if (!isTaskCurrent(taskGroupId, taskId)) {
                                log.info("Stopping task [%s] which does not match the expected parameters and ingestion spec", taskId);
                                try {
                                    stopTask(taskId, false).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
                                } catch (InterruptedException | ExecutionException | TimeoutException e) {
                                    log.warn(e, "Exception while stopping task");
                                }
                                return false;
                            } else {
                                taskGroups.get(taskGroupId).tasks.putIfAbsent(taskId, new TaskData());
                            }
                        }
                        return true;
                    }
                }, workerExec));
            }
        }
    }
    List<Boolean> results = Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
    for (int i = 0; i < results.size(); i++) {
        if (results.get(i) == null) {
            String taskId = futureTaskIds.get(i);
            log.warn("Task [%s] failed to return status, killing task", taskId);
            killTask(taskId);
        }
    }
    log.debug("Found [%d] Kafka indexing tasks for dataSource [%s]", taskCount, dataSource);
}
Also used : Task(io.druid.indexing.common.task.Task) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) Function(com.google.common.base.Function) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) TaskStatus(io.druid.indexing.common.TaskStatus) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 2 with Task

use of io.druid.indexing.common.task.Task in project druid by druid-io.

the class KafkaSupervisorTest method testDatasourceMetadata.

@Test
public /**
   * Test generating the starting offsets from the partition data stored in druid_dataSource which contains the
   * offsets of the last built segments.
   */
void testDatasourceMetadata() throws Exception {
    supervisor = getSupervisor(1, 1, true, "PT1H", null);
    addSomeEvents(100);
    Capture<KafkaIndexTask> captured = Capture.newInstance();
    expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    expect(taskMaster.getTaskRunner()).andReturn(Optional.<TaskRunner>absent()).anyTimes();
    expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.<Task>of()).anyTimes();
    expect(indexerMetadataStorageCoordinator.getDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(new KafkaPartitions(KAFKA_TOPIC, ImmutableMap.of(0, 10L, 1, 20L, 2, 30L)))).anyTimes();
    expect(taskQueue.add(capture(captured))).andReturn(true);
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    KafkaIndexTask task = captured.getValue();
    KafkaIOConfig taskConfig = task.getIOConfig();
    Assert.assertEquals(String.format("sequenceName-0", DATASOURCE), taskConfig.getBaseSequenceName());
    Assert.assertEquals(10L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(0));
    Assert.assertEquals(20L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(1));
    Assert.assertEquals(30L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(2));
}
Also used : RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) Task(io.druid.indexing.common.task.Task) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaPartitions(io.druid.indexing.kafka.KafkaPartitions) KafkaIOConfig(io.druid.indexing.kafka.KafkaIOConfig) KafkaDataSourceMetadata(io.druid.indexing.kafka.KafkaDataSourceMetadata) TaskRunner(io.druid.indexing.overlord.TaskRunner) Test(org.junit.Test)

Example 3 with Task

use of io.druid.indexing.common.task.Task in project druid by druid-io.

the class KafkaSupervisorTest method testBeginPublishAndQueueNextTasks.

@Test
public void testBeginPublishAndQueueNextTasks() throws Exception {
    final TaskLocation location = new TaskLocation("testHost", 1234);
    supervisor = getSupervisor(2, 2, true, "PT1M", null);
    addSomeEvents(100);
    Capture<Task> captured = Capture.newInstance(CaptureType.ALL);
    expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    expect(taskRunner.getRunningTasks()).andReturn(Collections.EMPTY_LIST).anyTimes();
    expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.<Task>of()).anyTimes();
    expect(indexerMetadataStorageCoordinator.getDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    expect(taskQueue.add(capture(captured))).andReturn(true).times(4);
    taskRunner.registerListener(anyObject(TaskRunnerListener.class), anyObject(Executor.class));
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    List<Task> tasks = captured.getValues();
    Collection workItems = new ArrayList<>();
    for (Task task : tasks) {
        workItems.add(new TestTaskRunnerWorkItem(task.getId(), null, location));
    }
    reset(taskStorage, taskRunner, taskClient, taskQueue);
    captured = Capture.newInstance(CaptureType.ALL);
    expect(taskStorage.getActiveTasks()).andReturn(tasks).anyTimes();
    for (Task task : tasks) {
        expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
        expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
    }
    expect(taskRunner.getRunningTasks()).andReturn(workItems).anyTimes();
    expect(taskClient.getStatusAsync(anyString())).andReturn(Futures.immediateFuture(KafkaIndexTask.Status.READING)).anyTimes();
    expect(taskClient.getStartTimeAsync(EasyMock.contains("sequenceName-0"))).andReturn(Futures.immediateFuture(DateTime.now().minusMinutes(2))).andReturn(Futures.immediateFuture(DateTime.now()));
    expect(taskClient.getStartTimeAsync(EasyMock.contains("sequenceName-1"))).andReturn(Futures.immediateFuture(DateTime.now())).times(2);
    expect(taskClient.pauseAsync(EasyMock.contains("sequenceName-0"))).andReturn(Futures.immediateFuture((Map<Integer, Long>) ImmutableMap.of(0, 10L, 1, 20L, 2, 30L))).andReturn(Futures.immediateFuture((Map<Integer, Long>) ImmutableMap.of(0, 10L, 1, 15L, 2, 35L)));
    expect(taskClient.setEndOffsetsAsync(EasyMock.contains("sequenceName-0"), EasyMock.eq(ImmutableMap.of(0, 10L, 1, 20L, 2, 35L)), EasyMock.eq(true))).andReturn(Futures.immediateFuture(true)).times(2);
    expect(taskQueue.add(capture(captured))).andReturn(true).times(2);
    replay(taskStorage, taskRunner, taskClient, taskQueue);
    supervisor.runInternal();
    verifyAll();
    for (Task task : captured.getValues()) {
        KafkaIndexTask kafkaIndexTask = (KafkaIndexTask) task;
        Assert.assertEquals(dataSchema, kafkaIndexTask.getDataSchema());
        Assert.assertEquals(KafkaTuningConfig.copyOf(tuningConfig), kafkaIndexTask.getTuningConfig());
        KafkaIOConfig taskConfig = kafkaIndexTask.getIOConfig();
        Assert.assertEquals("sequenceName-0", taskConfig.getBaseSequenceName());
        Assert.assertTrue("isUseTransaction", taskConfig.isUseTransaction());
        Assert.assertFalse("pauseAfterRead", taskConfig.isPauseAfterRead());
        Assert.assertEquals(KAFKA_TOPIC, taskConfig.getStartPartitions().getTopic());
        Assert.assertEquals(10L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(0));
        Assert.assertEquals(20L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(1));
        Assert.assertEquals(35L, (long) taskConfig.getStartPartitions().getPartitionOffsetMap().get(2));
    }
}
Also used : TaskRunnerListener(io.druid.indexing.overlord.TaskRunnerListener) RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) Task(io.druid.indexing.common.task.Task) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) ArrayList(java.util.ArrayList) TaskLocation(io.druid.indexing.common.TaskLocation) Executor(java.util.concurrent.Executor) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaIOConfig(io.druid.indexing.kafka.KafkaIOConfig) Collection(java.util.Collection) KafkaDataSourceMetadata(io.druid.indexing.kafka.KafkaDataSourceMetadata) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 4 with Task

use of io.druid.indexing.common.task.Task in project druid by druid-io.

the class KafkaSupervisorTest method testMultiTask.

@Test
public void testMultiTask() throws Exception {
    supervisor = getSupervisor(1, 2, true, "PT1H", null);
    addSomeEvents(1);
    Capture<KafkaIndexTask> captured = Capture.newInstance(CaptureType.ALL);
    expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    expect(taskMaster.getTaskRunner()).andReturn(Optional.<TaskRunner>absent()).anyTimes();
    expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.<Task>of()).anyTimes();
    expect(indexerMetadataStorageCoordinator.getDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    expect(taskQueue.add(capture(captured))).andReturn(true).times(2);
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    KafkaIndexTask task1 = captured.getValues().get(0);
    Assert.assertEquals(2, task1.getIOConfig().getStartPartitions().getPartitionOffsetMap().size());
    Assert.assertEquals(2, task1.getIOConfig().getEndPartitions().getPartitionOffsetMap().size());
    Assert.assertEquals(0L, (long) task1.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(0));
    Assert.assertEquals(Long.MAX_VALUE, (long) task1.getIOConfig().getEndPartitions().getPartitionOffsetMap().get(0));
    Assert.assertEquals(0L, (long) task1.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(2));
    Assert.assertEquals(Long.MAX_VALUE, (long) task1.getIOConfig().getEndPartitions().getPartitionOffsetMap().get(2));
    KafkaIndexTask task2 = captured.getValues().get(1);
    Assert.assertEquals(1, task2.getIOConfig().getStartPartitions().getPartitionOffsetMap().size());
    Assert.assertEquals(1, task2.getIOConfig().getEndPartitions().getPartitionOffsetMap().size());
    Assert.assertEquals(0L, (long) task2.getIOConfig().getStartPartitions().getPartitionOffsetMap().get(1));
    Assert.assertEquals(Long.MAX_VALUE, (long) task2.getIOConfig().getEndPartitions().getPartitionOffsetMap().get(1));
}
Also used : RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) Task(io.druid.indexing.common.task.Task) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaDataSourceMetadata(io.druid.indexing.kafka.KafkaDataSourceMetadata) TaskRunner(io.druid.indexing.overlord.TaskRunner) Test(org.junit.Test)

Example 5 with Task

use of io.druid.indexing.common.task.Task in project druid by druid-io.

the class KafkaSupervisorTest method testRequeueAdoptedTaskWhenFailed.

@Test
public void testRequeueAdoptedTaskWhenFailed() throws Exception {
    supervisor = getSupervisor(2, 1, true, "PT1H", null);
    addSomeEvents(1);
    DateTime now = DateTime.now();
    Task id1 = createKafkaIndexTask("id1", DATASOURCE, "sequenceName-0", new KafkaPartitions("topic", ImmutableMap.of(0, 0L, 2, 0L)), new KafkaPartitions("topic", ImmutableMap.of(0, Long.MAX_VALUE, 2, Long.MAX_VALUE)), now);
    List<Task> existingTasks = ImmutableList.of(id1);
    Capture<Task> captured = Capture.newInstance();
    expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    expect(taskRunner.getRunningTasks()).andReturn(Collections.EMPTY_LIST).anyTimes();
    expect(taskStorage.getActiveTasks()).andReturn(existingTasks).anyTimes();
    expect(taskStorage.getStatus("id1")).andReturn(Optional.of(TaskStatus.running("id1"))).anyTimes();
    expect(taskStorage.getTask("id1")).andReturn(Optional.of(id1)).anyTimes();
    expect(taskClient.getStatusAsync("id1")).andReturn(Futures.immediateFuture(KafkaIndexTask.Status.READING));
    expect(taskClient.getStartTimeAsync("id1")).andReturn(Futures.immediateFuture(now)).anyTimes();
    expect(taskQueue.add(capture(captured))).andReturn(true);
    expect(indexerMetadataStorageCoordinator.getDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    taskRunner.registerListener(anyObject(TaskRunnerListener.class), anyObject(Executor.class));
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    // check that replica tasks are created with the same minimumMessageTime as tasks inherited from another supervisor
    Assert.assertEquals(now, ((KafkaIndexTask) captured.getValue()).getIOConfig().getMinimumMessageTime().get());
    // test that a task failing causes a new task to be re-queued with the same parameters
    String runningTaskId = captured.getValue().getId();
    Capture<Task> aNewTaskCapture = Capture.newInstance();
    KafkaIndexTask iHaveFailed = (KafkaIndexTask) existingTasks.get(0);
    reset(taskStorage);
    reset(taskQueue);
    reset(taskClient);
    expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.of(captured.getValue())).anyTimes();
    expect(taskStorage.getStatus(iHaveFailed.getId())).andReturn(Optional.of(TaskStatus.failure(iHaveFailed.getId())));
    expect(taskStorage.getStatus(runningTaskId)).andReturn(Optional.of(TaskStatus.running(runningTaskId))).anyTimes();
    expect(taskStorage.getTask(iHaveFailed.getId())).andReturn(Optional.of((Task) iHaveFailed)).anyTimes();
    expect(taskStorage.getTask(runningTaskId)).andReturn(Optional.of(captured.getValue())).anyTimes();
    expect(taskClient.getStatusAsync(runningTaskId)).andReturn(Futures.immediateFuture(KafkaIndexTask.Status.READING));
    expect(taskClient.getStartTimeAsync(runningTaskId)).andReturn(Futures.immediateFuture(now)).anyTimes();
    expect(taskQueue.add(capture(aNewTaskCapture))).andReturn(true);
    replay(taskStorage);
    replay(taskQueue);
    replay(taskClient);
    supervisor.runInternal();
    verifyAll();
    Assert.assertNotEquals(iHaveFailed.getId(), aNewTaskCapture.getValue().getId());
    Assert.assertEquals(iHaveFailed.getIOConfig().getBaseSequenceName(), ((KafkaIndexTask) aNewTaskCapture.getValue()).getIOConfig().getBaseSequenceName());
    // check that failed tasks are recreated with the same minimumMessageTime as the task it replaced, even if that
    // task came from another supervisor
    Assert.assertEquals(now, ((KafkaIndexTask) aNewTaskCapture.getValue()).getIOConfig().getMinimumMessageTime().get());
}
Also used : TaskRunnerListener(io.druid.indexing.overlord.TaskRunnerListener) RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) Task(io.druid.indexing.common.task.Task) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) Executor(java.util.concurrent.Executor) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) KafkaPartitions(io.druid.indexing.kafka.KafkaPartitions) KafkaDataSourceMetadata(io.druid.indexing.kafka.KafkaDataSourceMetadata) EasyMock.anyString(org.easymock.EasyMock.anyString) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Aggregations

Task (io.druid.indexing.common.task.Task)80 Test (org.junit.Test)62 RealtimeIndexTask (io.druid.indexing.common.task.RealtimeIndexTask)35 NoopTask (io.druid.indexing.common.task.NoopTask)28 KafkaIndexTask (io.druid.indexing.kafka.KafkaIndexTask)24 KafkaDataSourceMetadata (io.druid.indexing.kafka.KafkaDataSourceMetadata)23 Interval (org.joda.time.Interval)18 TaskStatus (io.druid.indexing.common.TaskStatus)17 TaskRunnerListener (io.druid.indexing.overlord.TaskRunnerListener)17 Executor (java.util.concurrent.Executor)17 KafkaPartitions (io.druid.indexing.kafka.KafkaPartitions)12 AbstractFixedIntervalTask (io.druid.indexing.common.task.AbstractFixedIntervalTask)11 IndexTask (io.druid.indexing.common.task.IndexTask)11 KillTask (io.druid.indexing.common.task.KillTask)11 FireDepartmentTest (io.druid.segment.realtime.FireDepartmentTest)10 SegmentIdentifier (io.druid.segment.realtime.appenderator.SegmentIdentifier)10 Map (java.util.Map)10 DateTime (org.joda.time.DateTime)10 ImmutableMap (com.google.common.collect.ImmutableMap)9 Collection (java.util.Collection)9