Search in sources :

Example 11 with KafkaDataSourceMetadata

use of org.apache.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.

the class KafkaSupervisorTest method testBeginPublishAndQueueNextTasks.

@Test
public void testBeginPublishAndQueueNextTasks() throws Exception {
    final TaskLocation location = new TaskLocation("testHost", 1234, -1);
    supervisor = getTestableSupervisor(2, 2, true, "PT1M", null, null);
    final KafkaSupervisorTuningConfig tuningConfig = supervisor.getTuningConfig();
    addSomeEvents(100);
    Capture<Task> captured = Capture.newInstance(CaptureType.ALL);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true).times(4);
    taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    List<Task> tasks = captured.getValues();
    Collection workItems = new ArrayList<>();
    for (Task task : tasks) {
        workItems.add(new TestTaskRunnerWorkItem(task, null, location));
    }
    EasyMock.reset(taskStorage, taskRunner, taskClient, taskQueue);
    captured = Capture.newInstance(CaptureType.ALL);
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(tasks).anyTimes();
    for (Task task : tasks) {
        EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
        EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
    }
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(workItems).anyTimes();
    EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(Status.READING)).anyTimes();
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.contains("sequenceName-0"))).andReturn(Futures.immediateFuture(DateTimes.nowUtc().minusMinutes(2))).andReturn(Futures.immediateFuture(DateTimes.nowUtc()));
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.contains("sequenceName-1"))).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).times(2);
    EasyMock.expect(taskClient.pauseAsync(EasyMock.contains("sequenceName-0"))).andReturn(Futures.immediateFuture(ImmutableMap.of(0, 10L, 2, 30L))).andReturn(Futures.immediateFuture(ImmutableMap.of(0, 10L, 2, 35L)));
    EasyMock.expect(taskClient.setEndOffsetsAsync(EasyMock.contains("sequenceName-0"), EasyMock.eq(ImmutableMap.of(0, 10L, 2, 35L)), EasyMock.eq(true))).andReturn(Futures.immediateFuture(true)).times(2);
    EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true).times(2);
    TreeMap<Integer, Map<Integer, Long>> checkpoints1 = new TreeMap<>();
    checkpoints1.put(0, ImmutableMap.of(0, 0L, 2, 0L));
    TreeMap<Integer, Map<Integer, Long>> checkpoints2 = new TreeMap<>();
    checkpoints2.put(0, ImmutableMap.of(1, 0L));
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints1)).times(2);
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints2)).times(2);
    EasyMock.replay(taskStorage, taskRunner, taskClient, taskQueue);
    supervisor.runInternal();
    verifyAll();
    for (Task task : captured.getValues()) {
        KafkaIndexTask kafkaIndexTask = (KafkaIndexTask) task;
        Assert.assertEquals(dataSchema, kafkaIndexTask.getDataSchema());
        Assert.assertEquals(tuningConfig.convertToTaskTuningConfig(), kafkaIndexTask.getTuningConfig());
        KafkaIndexTaskIOConfig taskConfig = kafkaIndexTask.getIOConfig();
        Assert.assertEquals("sequenceName-0", taskConfig.getBaseSequenceName());
        Assert.assertTrue("isUseTransaction", taskConfig.isUseTransaction());
        Assert.assertEquals(topic, taskConfig.getStartSequenceNumbers().getStream());
        Assert.assertEquals(10L, (long) taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(0));
        Assert.assertEquals(35L, (long) taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(2));
    }
}
Also used : TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) KafkaIndexTask(org.apache.druid.indexing.kafka.KafkaIndexTask) Task(org.apache.druid.indexing.common.task.Task) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) ArrayList(java.util.ArrayList) TreeMap(java.util.TreeMap) TaskLocation(org.apache.druid.indexer.TaskLocation) KafkaIndexTaskIOConfig(org.apache.druid.indexing.kafka.KafkaIndexTaskIOConfig) Executor(java.util.concurrent.Executor) KafkaIndexTask(org.apache.druid.indexing.kafka.KafkaIndexTask) Collection(java.util.Collection) KafkaDataSourceMetadata(org.apache.druid.indexing.kafka.KafkaDataSourceMetadata) Map(java.util.Map) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 12 with KafkaDataSourceMetadata

use of org.apache.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.

the class KafkaSupervisorTest method testEarlyMessageRejectionPeriod.

@Test
public void testEarlyMessageRejectionPeriod() throws Exception {
    supervisor = getTestableSupervisor(2, 1, true, "PT1H", null, new Period("PT1H"));
    addSomeEvents(1);
    Capture<KafkaIndexTask> captured = Capture.newInstance(CaptureType.ALL);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true).times(2);
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    KafkaIndexTask task1 = captured.getValues().get(0);
    KafkaIndexTask task2 = captured.getValues().get(1);
    Assert.assertTrue("maximumMessageTime", task1.getIOConfig().getMaximumMessageTime().get().minusMinutes(59 + 60).isAfterNow());
    Assert.assertTrue("maximumMessageTime", task1.getIOConfig().getMaximumMessageTime().get().minusMinutes(61 + 60).isBeforeNow());
    Assert.assertEquals(task1.getIOConfig().getMaximumMessageTime().get(), task2.getIOConfig().getMaximumMessageTime().get());
}
Also used : KafkaIndexTask(org.apache.druid.indexing.kafka.KafkaIndexTask) Period(org.joda.time.Period) KafkaDataSourceMetadata(org.apache.druid.indexing.kafka.KafkaDataSourceMetadata) Test(org.junit.Test)

Example 13 with KafkaDataSourceMetadata

use of org.apache.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.

the class KafkaSupervisorTest method testMultiTask.

@Test
public void testMultiTask() throws Exception {
    supervisor = getTestableSupervisor(1, 2, true, "PT1H", null, null);
    addSomeEvents(1);
    Capture<KafkaIndexTask> captured = Capture.newInstance(CaptureType.ALL);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true).times(2);
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    KafkaIndexTask task1 = captured.getValues().get(0);
    Assert.assertEquals(2, task1.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().size());
    Assert.assertEquals(2, task1.getIOConfig().getEndSequenceNumbers().getPartitionSequenceNumberMap().size());
    Assert.assertEquals(0L, task1.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(0).longValue());
    Assert.assertEquals(Long.MAX_VALUE, task1.getIOConfig().getEndSequenceNumbers().getPartitionSequenceNumberMap().get(0).longValue());
    Assert.assertEquals(0L, task1.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(2).longValue());
    Assert.assertEquals(Long.MAX_VALUE, task1.getIOConfig().getEndSequenceNumbers().getPartitionSequenceNumberMap().get(2).longValue());
    KafkaIndexTask task2 = captured.getValues().get(1);
    Assert.assertEquals(1, task2.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().size());
    Assert.assertEquals(1, task2.getIOConfig().getEndSequenceNumbers().getPartitionSequenceNumberMap().size());
    Assert.assertEquals(0L, task2.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().get(1).longValue());
    Assert.assertEquals(Long.MAX_VALUE, task2.getIOConfig().getEndSequenceNumbers().getPartitionSequenceNumberMap().get(1).longValue());
}
Also used : KafkaIndexTask(org.apache.druid.indexing.kafka.KafkaIndexTask) KafkaDataSourceMetadata(org.apache.druid.indexing.kafka.KafkaDataSourceMetadata) Test(org.junit.Test)

Example 14 with KafkaDataSourceMetadata

use of org.apache.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.

the class KafkaSupervisorTest method testKillIncompatibleTasks.

@Test
public void testKillIncompatibleTasks() throws Exception {
    // This supervisor always returns false for isTaskCurrent -> it should kill its tasks
    int numReplicas = 2;
    supervisor = getTestableSupervisorCustomIsTaskCurrent(numReplicas, 1, true, "PT1H", new Period("P1D"), new Period("P1D"), false, false);
    addSomeEvents(1);
    Task task = createKafkaIndexTask("id1", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>("topic", ImmutableMap.of(0, 0L, 2, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>("topic", ImmutableMap.of(0, Long.MAX_VALUE, 2, Long.MAX_VALUE)), null, null, supervisor.getTuningConfig());
    List<Task> existingTasks = ImmutableList.of(task);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(existingTasks).anyTimes();
    EasyMock.expect(taskStorage.getStatus("id1")).andReturn(Optional.of(TaskStatus.running("id1"))).anyTimes();
    EasyMock.expect(taskStorage.getTask("id1")).andReturn(Optional.of(task)).anyTimes();
    EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(Status.NOT_STARTED)).anyTimes();
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    EasyMock.expect(taskClient.stopAsync("id1", false)).andReturn(Futures.immediateFuture(true));
    taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
    EasyMock.expect(taskQueue.add(EasyMock.anyObject(Task.class))).andReturn(true).times(2);
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
}
Also used : TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) KafkaIndexTask(org.apache.druid.indexing.kafka.KafkaIndexTask) Task(org.apache.druid.indexing.common.task.Task) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) Executor(java.util.concurrent.Executor) Period(org.joda.time.Period) KafkaDataSourceMetadata(org.apache.druid.indexing.kafka.KafkaDataSourceMetadata) Test(org.junit.Test)

Example 15 with KafkaDataSourceMetadata

use of org.apache.druid.indexing.kafka.KafkaDataSourceMetadata in project druid by druid-io.

the class KafkaSupervisorTest method testRequeueAdoptedTaskWhenFailed.

@Test
public void testRequeueAdoptedTaskWhenFailed() throws Exception {
    supervisor = getTestableSupervisor(2, 1, true, "PT1H", null, null);
    addSomeEvents(1);
    DateTime now = DateTimes.nowUtc();
    DateTime maxi = now.plusMinutes(60);
    Task id1 = createKafkaIndexTask("id1", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>("topic", ImmutableMap.of(0, 0L, 2, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>("topic", ImmutableMap.of(0, Long.MAX_VALUE, 2, Long.MAX_VALUE)), now, maxi, supervisor.getTuningConfig());
    List<Task> existingTasks = ImmutableList.of(id1);
    Capture<Task> captured = Capture.newInstance();
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(existingTasks).anyTimes();
    EasyMock.expect(taskStorage.getStatus("id1")).andReturn(Optional.of(TaskStatus.running("id1"))).anyTimes();
    EasyMock.expect(taskStorage.getTask("id1")).andReturn(Optional.of(id1)).anyTimes();
    EasyMock.expect(taskClient.getStatusAsync("id1")).andReturn(Futures.immediateFuture(Status.READING));
    EasyMock.expect(taskClient.getStartTimeAsync("id1")).andReturn(Futures.immediateFuture(now)).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true);
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
    TreeMap<Integer, Map<Integer, Long>> checkpoints = new TreeMap<>();
    checkpoints.put(0, ImmutableMap.of(0, 0L, 2, 0L));
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("id1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints)).times(2);
    taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    // check that replica tasks are created with the same minimumMessageTime as tasks inherited from another supervisor
    Assert.assertEquals(now, ((KafkaIndexTask) captured.getValue()).getIOConfig().getMinimumMessageTime().get());
    // test that a task failing causes a new task to be re-queued with the same parameters
    String runningTaskId = captured.getValue().getId();
    Capture<Task> aNewTaskCapture = Capture.newInstance();
    KafkaIndexTask iHaveFailed = (KafkaIndexTask) existingTasks.get(0);
    EasyMock.reset(taskStorage);
    EasyMock.reset(taskQueue);
    EasyMock.reset(taskClient);
    // for the newly created replica task
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints)).times(2);
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("id1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints)).times(1);
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of(captured.getValue())).anyTimes();
    EasyMock.expect(taskStorage.getStatus(iHaveFailed.getId())).andReturn(Optional.of(TaskStatus.failure(iHaveFailed.getId(), "Dummy task status failure err message")));
    EasyMock.expect(taskStorage.getStatus(runningTaskId)).andReturn(Optional.of(TaskStatus.running(runningTaskId))).anyTimes();
    EasyMock.expect(taskStorage.getTask(iHaveFailed.getId())).andReturn(Optional.of(iHaveFailed)).anyTimes();
    EasyMock.expect(taskStorage.getTask(runningTaskId)).andReturn(Optional.of(captured.getValue())).anyTimes();
    EasyMock.expect(taskClient.getStatusAsync(runningTaskId)).andReturn(Futures.immediateFuture(Status.READING));
    EasyMock.expect(taskClient.getStartTimeAsync(runningTaskId)).andReturn(Futures.immediateFuture(now)).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(aNewTaskCapture))).andReturn(true);
    EasyMock.replay(taskStorage);
    EasyMock.replay(taskQueue);
    EasyMock.replay(taskClient);
    supervisor.runInternal();
    verifyAll();
    Assert.assertNotEquals(iHaveFailed.getId(), aNewTaskCapture.getValue().getId());
    Assert.assertEquals(iHaveFailed.getIOConfig().getBaseSequenceName(), ((KafkaIndexTask) aNewTaskCapture.getValue()).getIOConfig().getBaseSequenceName());
    // check that failed tasks are recreated with the same minimumMessageTime as the task it replaced, even if that
    // task came from another supervisor
    Assert.assertEquals(now, ((KafkaIndexTask) aNewTaskCapture.getValue()).getIOConfig().getMinimumMessageTime().get());
    Assert.assertEquals(maxi, ((KafkaIndexTask) aNewTaskCapture.getValue()).getIOConfig().getMaximumMessageTime().get());
}
Also used : TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) KafkaIndexTask(org.apache.druid.indexing.kafka.KafkaIndexTask) Task(org.apache.druid.indexing.common.task.Task) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) TreeMap(java.util.TreeMap) DateTime(org.joda.time.DateTime) Executor(java.util.concurrent.Executor) KafkaIndexTask(org.apache.druid.indexing.kafka.KafkaIndexTask) KafkaDataSourceMetadata(org.apache.druid.indexing.kafka.KafkaDataSourceMetadata) Map(java.util.Map) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Test(org.junit.Test)

Aggregations

KafkaDataSourceMetadata (org.apache.druid.indexing.kafka.KafkaDataSourceMetadata)33 Test (org.junit.Test)33 KafkaIndexTask (org.apache.druid.indexing.kafka.KafkaIndexTask)30 Executor (java.util.concurrent.Executor)25 TaskRunnerListener (org.apache.druid.indexing.overlord.TaskRunnerListener)25 RealtimeIndexTask (org.apache.druid.indexing.common.task.RealtimeIndexTask)20 Task (org.apache.druid.indexing.common.task.Task)20 HashMap (java.util.HashMap)17 ImmutableMap (com.google.common.collect.ImmutableMap)16 Map (java.util.Map)16 TreeMap (java.util.TreeMap)16 ArrayList (java.util.ArrayList)10 Collection (java.util.Collection)10 TaskLocation (org.apache.druid.indexer.TaskLocation)10 KafkaIndexTaskIOConfig (org.apache.druid.indexing.kafka.KafkaIndexTaskIOConfig)9 DateTime (org.joda.time.DateTime)7 Period (org.joda.time.Period)5 TaskReportData (org.apache.druid.indexing.seekablestream.supervisor.TaskReportData)3 SeekableStreamStartSequenceNumbers (org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers)2 File (java.io.File)1