Search in sources :

Example 6 with KinesisIndexTaskIOConfig

use of org.apache.druid.indexing.kinesis.KinesisIndexTaskIOConfig in project druid by apache.

the class KinesisSupervisorTest method testBeginPublishAndQueueNextTasks.

@Test
public void testBeginPublishAndQueueNextTasks() throws Exception {
    final TaskLocation location = new TaskLocation("testHost", 1234, -1);
    supervisor = getTestableSupervisor(2, 2, true, "PT1M", null, null);
    supervisorRecordSupplier.assign(EasyMock.anyObject());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID1, SHARD_ID0)).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getAssignment()).andReturn(ImmutableSet.of(SHARD1_PARTITION, SHARD0_PARTITION)).anyTimes();
    supervisorRecordSupplier.seekToLatest(EasyMock.anyObject());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(SHARD1_PARTITION)).andReturn("12").anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(SHARD0_PARTITION)).andReturn("1").anyTimes();
    supervisorRecordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
    EasyMock.expectLastCall().anyTimes();
    final Capture<Task> firstTasks = Capture.newInstance(CaptureType.ALL);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KinesisDataSourceMetadata(null)).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(firstTasks))).andReturn(true).times(4);
    taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    final List<Task> tasks = firstTasks.getValues();
    Collection workItems = new ArrayList<>();
    for (Task task : tasks) {
        workItems.add(new TestTaskRunnerWorkItem(task, null, location));
    }
    EasyMock.reset(taskStorage, taskRunner, taskClient, taskQueue);
    final Capture<Task> secondTasks = Capture.newInstance(CaptureType.ALL);
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(tasks).anyTimes();
    for (Task task : tasks) {
        EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
        EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
    }
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(workItems).anyTimes();
    EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.READING)).anyTimes();
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.contains("sequenceName-0"))).andReturn(Futures.immediateFuture(DateTimes.nowUtc().minusMinutes(2))).andReturn(Futures.immediateFuture(DateTimes.nowUtc()));
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.contains("sequenceName-1"))).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).times(2);
    EasyMock.expect(taskClient.pauseAsync(EasyMock.contains("sequenceName-0"))).andReturn(Futures.immediateFuture(ImmutableMap.of(SHARD_ID1, "1"))).andReturn(Futures.immediateFuture(ImmutableMap.of(SHARD_ID1, "3")));
    EasyMock.expect(taskClient.setEndOffsetsAsync(EasyMock.contains("sequenceName-0"), EasyMock.eq(ImmutableMap.of(SHARD_ID1, "3")), EasyMock.eq(true))).andReturn(Futures.immediateFuture(true)).times(2);
    EasyMock.expect(taskQueue.add(EasyMock.capture(secondTasks))).andReturn(true).times(2);
    TreeMap<Integer, Map<String, String>> checkpoints1 = new TreeMap<>();
    checkpoints1.put(0, ImmutableMap.of(SHARD_ID1, "0"));
    TreeMap<Integer, Map<String, String>> checkpoints2 = new TreeMap<>();
    checkpoints2.put(0, ImmutableMap.of(SHARD_ID0, "0"));
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints1)).times(2);
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints2)).times(2);
    EasyMock.replay(taskStorage, taskRunner, taskClient, taskQueue);
    supervisor.runInternal();
    verifyAll();
    for (Task task : secondTasks.getValues()) {
        KinesisIndexTask kinesisIndexTask = (KinesisIndexTask) task;
        Assert.assertEquals(dataSchema, kinesisIndexTask.getDataSchema());
        Assert.assertEquals(tuningConfig.convertToTaskTuningConfig(), kinesisIndexTask.getTuningConfig());
        KinesisIndexTaskIOConfig taskConfig = kinesisIndexTask.getIOConfig();
        Assert.assertEquals("sequenceName-0", taskConfig.getBaseSequenceName());
        Assert.assertTrue("isUseTransaction", taskConfig.isUseTransaction());
        Assert.assertEquals(STREAM, taskConfig.getStartSequenceNumbers().getStream());
        Assert.assertEquals("3", taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(SHARD_ID1));
        // start sequenceNumbers should be exclusive for the second batch of tasks
        Assert.assertEquals(ImmutableSet.of(SHARD_ID1), ((KinesisIndexTask) task).getIOConfig().getStartSequenceNumbers().getExclusivePartitions());
    }
}
Also used : TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) Task(org.apache.druid.indexing.common.task.Task) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) ArrayList(java.util.ArrayList) TreeMap(java.util.TreeMap) TaskLocation(org.apache.druid.indexer.TaskLocation) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) Executor(java.util.concurrent.Executor) KinesisDataSourceMetadata(org.apache.druid.indexing.kinesis.KinesisDataSourceMetadata) Collection(java.util.Collection) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) KinesisIndexTaskIOConfig(org.apache.druid.indexing.kinesis.KinesisIndexTaskIOConfig) Test(org.junit.Test)

Example 7 with KinesisIndexTaskIOConfig

use of org.apache.druid.indexing.kinesis.KinesisIndexTaskIOConfig in project druid by apache.

the class KinesisSupervisorTest method testKillUnresponsiveTasksWhileSettingEndOffsets.

@Test
public void testKillUnresponsiveTasksWhileSettingEndOffsets() throws Exception {
    final TaskLocation location = new TaskLocation("testHost", 1234, -1);
    supervisor = getTestableSupervisor(2, 2, true, "PT1M", null, null);
    supervisorRecordSupplier.assign(EasyMock.anyObject());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID1, SHARD_ID0)).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getAssignment()).andReturn(ImmutableSet.of(SHARD1_PARTITION, SHARD0_PARTITION)).anyTimes();
    supervisorRecordSupplier.seekToLatest(EasyMock.anyObject());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(SHARD1_PARTITION)).andReturn("12").anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(SHARD0_PARTITION)).andReturn("1").anyTimes();
    supervisorRecordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
    EasyMock.expectLastCall().anyTimes();
    Capture<Task> captured = Capture.newInstance(CaptureType.ALL);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KinesisDataSourceMetadata(null)).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true).times(4);
    taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    List<Task> tasks = captured.getValues();
    Collection workItems = new ArrayList<>();
    for (Task task : tasks) {
        workItems.add(new TestTaskRunnerWorkItem(task, null, location));
    }
    EasyMock.reset(taskStorage, taskRunner, taskClient, taskQueue);
    TreeMap<Integer, Map<String, String>> checkpoints1 = new TreeMap<>();
    checkpoints1.put(0, ImmutableMap.of(SHARD_ID1, "0"));
    TreeMap<Integer, Map<String, String>> checkpoints2 = new TreeMap<>();
    checkpoints2.put(0, ImmutableMap.of(SHARD_ID0, "0"));
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints1)).times(2);
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints2)).times(2);
    captured = Capture.newInstance(CaptureType.ALL);
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(tasks).anyTimes();
    for (Task task : tasks) {
        EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
        EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
    }
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(workItems).anyTimes();
    EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.READING)).anyTimes();
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.contains("sequenceName-0"))).andReturn(Futures.immediateFuture(DateTimes.nowUtc().minusMinutes(2))).andReturn(Futures.immediateFuture(DateTimes.nowUtc()));
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.contains("sequenceName-1"))).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).times(2);
    EasyMock.expect(taskClient.pauseAsync(EasyMock.contains("sequenceName-0"))).andReturn(Futures.immediateFuture(ImmutableMap.of(SHARD_ID1, "1"))).andReturn(Futures.immediateFuture(ImmutableMap.of(SHARD_ID1, "3")));
    EasyMock.expect(taskClient.setEndOffsetsAsync(EasyMock.contains("sequenceName-0"), EasyMock.eq(ImmutableMap.of(SHARD_ID1, "3")), EasyMock.eq(true))).andReturn(Futures.immediateFailedFuture(new RuntimeException())).times(2);
    taskQueue.shutdown(EasyMock.contains("sequenceName-0"), EasyMock.eq("Task [%s] failed to respond to [set end offsets] in a timely manner, killing task"), EasyMock.contains("sequenceName-0"));
    EasyMock.expectLastCall().times(2);
    EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true).times(2);
    EasyMock.replay(taskStorage, taskRunner, taskClient, taskQueue);
    supervisor.runInternal();
    verifyAll();
    for (Task task : captured.getValues()) {
        KinesisIndexTaskIOConfig taskConfig = ((KinesisIndexTask) task).getIOConfig();
        Assert.assertEquals("0", taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(SHARD_ID1));
    }
}
Also used : TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) Task(org.apache.druid.indexing.common.task.Task) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) ArrayList(java.util.ArrayList) TreeMap(java.util.TreeMap) TaskLocation(org.apache.druid.indexer.TaskLocation) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) Executor(java.util.concurrent.Executor) KinesisDataSourceMetadata(org.apache.druid.indexing.kinesis.KinesisDataSourceMetadata) Collection(java.util.Collection) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) KinesisIndexTaskIOConfig(org.apache.druid.indexing.kinesis.KinesisIndexTaskIOConfig) Test(org.junit.Test)

Example 8 with KinesisIndexTaskIOConfig

use of org.apache.druid.indexing.kinesis.KinesisIndexTaskIOConfig in project druid by apache.

the class KinesisSupervisorTest method testNoInitialState.

@Test
public void testNoInitialState() throws Exception {
    supervisor = getTestableSupervisor(1, 1, true, "PT1H", null, null);
    supervisorRecordSupplier.assign(EasyMock.anyObject());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID1, SHARD_ID0)).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getAssignment()).andReturn(ImmutableSet.of(SHARD1_PARTITION, SHARD0_PARTITION)).anyTimes();
    supervisorRecordSupplier.seekToLatest(EasyMock.anyObject());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
    supervisorRecordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
    EasyMock.expectLastCall().anyTimes();
    Capture<KinesisIndexTask> captured = Capture.newInstance();
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KinesisDataSourceMetadata(null)).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true);
    taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
    replayAll();
    supervisor.start();
    supervisor.runInternal();
    verifyAll();
    KinesisIndexTask task = captured.getValue();
    Assert.assertEquals(dataSchema, task.getDataSchema());
    Assert.assertEquals(tuningConfig.convertToTaskTuningConfig(), task.getTuningConfig());
    KinesisIndexTaskIOConfig taskConfig = task.getIOConfig();
    Assert.assertEquals("sequenceName-0", taskConfig.getBaseSequenceName());
    Assert.assertTrue("isUseTransaction", taskConfig.isUseTransaction());
    Assert.assertFalse("minimumMessageTime", taskConfig.getMinimumMessageTime().isPresent());
    Assert.assertFalse("maximumMessageTime", taskConfig.getMaximumMessageTime().isPresent());
    Assert.assertEquals(STREAM, taskConfig.getStartSequenceNumbers().getStream());
    Assert.assertEquals("0", taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(SHARD_ID1));
    Assert.assertEquals("0", taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(SHARD_ID0));
    Assert.assertEquals(STREAM, taskConfig.getEndSequenceNumbers().getStream());
    Assert.assertEquals(KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER, taskConfig.getEndSequenceNumbers().getPartitionSequenceNumberMap().get(SHARD_ID1));
    Assert.assertEquals(KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER, taskConfig.getEndSequenceNumbers().getPartitionSequenceNumberMap().get(SHARD_ID0));
}
Also used : TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) Executor(java.util.concurrent.Executor) KinesisDataSourceMetadata(org.apache.druid.indexing.kinesis.KinesisDataSourceMetadata) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) KinesisIndexTaskIOConfig(org.apache.druid.indexing.kinesis.KinesisIndexTaskIOConfig) Test(org.junit.Test)

Example 9 with KinesisIndexTaskIOConfig

use of org.apache.druid.indexing.kinesis.KinesisIndexTaskIOConfig in project druid by apache.

the class KinesisSupervisorTest method testShardMergePhaseTwo.

/**
 * Test task creation after a shard split with a closed shard
 *
 * @param phaseOneTasks List of tasks from the initial phase where only one shard was present
 */
private List<Task> testShardMergePhaseTwo(List<Task> phaseOneTasks) throws Exception {
    EasyMock.reset(indexerMetadataStorageCoordinator);
    EasyMock.reset(taskStorage);
    EasyMock.reset(taskQueue);
    EasyMock.reset(taskClient);
    EasyMock.reset(taskMaster);
    EasyMock.reset(taskRunner);
    EasyMock.reset(supervisorRecordSupplier);
    // first tasks ran, both shard 0 and shard 1 have reached EOS, merged into shard 2
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<String, String>(STREAM, ImmutableMap.of(SHARD_ID0, KinesisSequenceNumber.END_OF_SHARD_MARKER, SHARD_ID1, KinesisSequenceNumber.END_OF_SHARD_MARKER)))).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID0, SHARD_ID1, SHARD_ID2)).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getAssignment()).andReturn(ImmutableSet.of(SHARD0_PARTITION, SHARD1_PARTITION, SHARD2_PARTITION)).anyTimes();
    supervisorRecordSupplier.seekToLatest(EasyMock.anyObject());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID0))).andReturn(KinesisSequenceNumber.END_OF_SHARD_MARKER).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID1))).andReturn(KinesisSequenceNumber.END_OF_SHARD_MARKER).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID2))).andReturn("100").anyTimes();
    supervisorRecordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
    EasyMock.expectLastCall().anyTimes();
    Capture<Task> postMergeCaptured = Capture.newInstance(CaptureType.ALL);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
    Task successfulTask0 = phaseOneTasks.get(0);
    Task successfulTask1 = phaseOneTasks.get(1);
    EasyMock.expect(taskStorage.getStatus(successfulTask0.getId())).andReturn(Optional.of(TaskStatus.success(successfulTask0.getId())));
    EasyMock.expect(taskStorage.getTask(successfulTask0.getId())).andReturn(Optional.of(successfulTask0)).anyTimes();
    EasyMock.expect(taskStorage.getStatus(successfulTask1.getId())).andReturn(Optional.of(TaskStatus.success(successfulTask1.getId())));
    EasyMock.expect(taskStorage.getTask(successfulTask1.getId())).andReturn(Optional.of(successfulTask1)).anyTimes();
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(postMergeCaptured))).andReturn(true).times(1);
    replayAll();
    supervisor.runInternal();
    verifyAll();
    EasyMock.reset(taskStorage);
    EasyMock.reset(taskClient);
    EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
    TreeMap<Integer, Map<String, String>> checkpointsGroup0 = new TreeMap<>();
    checkpointsGroup0.put(0, ImmutableMap.of(SHARD_ID2, "0", SHARD_ID1, KinesisSequenceNumber.END_OF_SHARD_MARKER));
    // there would be 1 tasks, 1 for each task group, but task group 1 only has closed shards, so no task is created
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpointsGroup0)).times(1);
    List<Task> postMergeTasks = postMergeCaptured.getValues();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(postMergeTasks).anyTimes();
    for (Task task : postMergeTasks) {
        EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
        EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
    }
    EasyMock.replay(taskStorage);
    EasyMock.replay(taskClient);
    supervisor.runInternal();
    verifyAll();
    // Check that shardId-000000000000 which has hit EOS is not included in the sequences sent to the task for group 0
    SeekableStreamStartSequenceNumbers<String, String> group0ExpectedStartSequenceNumbers = new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, "0"), ImmutableSet.of());
    SeekableStreamEndSequenceNumbers<String, String> group0ExpectedEndSequenceNumbers = new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER));
    Assert.assertEquals(1, postMergeTasks.size());
    KinesisIndexTaskIOConfig group0Config = ((KinesisIndexTask) postMergeTasks.get(0)).getIOConfig();
    Assert.assertEquals((Integer) 0, group0Config.getTaskGroupId());
    Assert.assertEquals(group0ExpectedStartSequenceNumbers, group0Config.getStartSequenceNumbers());
    Assert.assertEquals(group0ExpectedEndSequenceNumbers, group0Config.getEndSequenceNumbers());
    return postMergeTasks;
}
Also used : StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) Task(org.apache.druid.indexing.common.task.Task) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) TreeMap(java.util.TreeMap) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) KinesisDataSourceMetadata(org.apache.druid.indexing.kinesis.KinesisDataSourceMetadata) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) KinesisIndexTaskIOConfig(org.apache.druid.indexing.kinesis.KinesisIndexTaskIOConfig)

Example 10 with KinesisIndexTaskIOConfig

use of org.apache.druid.indexing.kinesis.KinesisIndexTaskIOConfig in project druid by apache.

the class KinesisSupervisorTest method testShardSplitPhaseThree.

/**
 * Test task creation after a shard split with a closed shard, with the closed shards expiring and no longer
 * being returned from record supplier.
 *
 * @param phaseTwoTasks List of tasks from the second phase where closed but not expired shards were present.
 */
private void testShardSplitPhaseThree(List<Task> phaseTwoTasks) throws Exception {
    EasyMock.reset(indexerMetadataStorageCoordinator);
    EasyMock.reset(taskStorage);
    EasyMock.reset(taskQueue);
    EasyMock.reset(taskClient);
    EasyMock.reset(taskMaster);
    EasyMock.reset(taskRunner);
    EasyMock.reset(supervisorRecordSupplier);
    // second set of tasks ran, shard 0 has expired, but shard 1 and 2 have data
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<String, String>(STREAM, ImmutableMap.of(SHARD_ID0, KinesisSequenceNumber.END_OF_SHARD_MARKER, SHARD_ID1, "100", SHARD_ID2, "100")))).anyTimes();
    EasyMock.expect(indexerMetadataStorageCoordinator.resetDataSourceMetadata(DATASOURCE, new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<String, String>(STREAM, ImmutableMap.of(SHARD_ID0, KinesisSequenceNumber.EXPIRED_MARKER, SHARD_ID1, "100", SHARD_ID2, "100"))))).andReturn(true).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID1, SHARD_ID2)).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getAssignment()).andReturn(ImmutableSet.of(SHARD1_PARTITION, SHARD2_PARTITION)).anyTimes();
    supervisorRecordSupplier.seekToLatest(EasyMock.anyObject());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID1))).andReturn("200").anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID2))).andReturn("200").anyTimes();
    supervisorRecordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
    EasyMock.expectLastCall().anyTimes();
    Capture<Task> postSplitCaptured = Capture.newInstance(CaptureType.ALL);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
    Task successfulTask0 = phaseTwoTasks.get(0);
    EasyMock.expect(taskStorage.getStatus(successfulTask0.getId())).andReturn(Optional.of(TaskStatus.success(successfulTask0.getId())));
    EasyMock.expect(taskStorage.getTask(successfulTask0.getId())).andReturn(Optional.of(successfulTask0)).anyTimes();
    Task successfulTask1 = phaseTwoTasks.get(1);
    EasyMock.expect(taskStorage.getStatus(successfulTask1.getId())).andReturn(Optional.of(TaskStatus.success(successfulTask1.getId())));
    EasyMock.expect(taskStorage.getTask(successfulTask1.getId())).andReturn(Optional.of(successfulTask1)).anyTimes();
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(postSplitCaptured))).andReturn(true).times(2);
    replayAll();
    supervisor.runInternal();
    verifyAll();
    EasyMock.reset(taskStorage);
    EasyMock.reset(taskClient);
    EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
    TreeMap<Integer, Map<String, String>> checkpointsGroup0 = new TreeMap<>();
    checkpointsGroup0.put(0, ImmutableMap.of(SHARD_ID2, "100"));
    TreeMap<Integer, Map<String, String>> checkpointsGroup1 = new TreeMap<>();
    checkpointsGroup1.put(1, ImmutableMap.of(SHARD_ID1, "100"));
    // there would be 2 tasks, 1 for each task group
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpointsGroup0)).times(1);
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpointsGroup1)).times(1);
    List<Task> postSplitTasks = postSplitCaptured.getValues();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(postSplitTasks).anyTimes();
    for (Task task : postSplitTasks) {
        EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
        EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
    }
    EasyMock.replay(taskStorage);
    EasyMock.replay(taskClient);
    supervisor.runInternal();
    verifyAll();
    // Check that shardId-000000000000 which has hit EOS is not included in the sequences sent to the task for group 0
    SeekableStreamStartSequenceNumbers<String, String> group0ExpectedStartSequenceNumbers = new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "100"), ImmutableSet.of(SHARD_ID1));
    SeekableStreamEndSequenceNumbers<String, String> group0ExpectedEndSequenceNumbers = new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER));
    SeekableStreamStartSequenceNumbers<String, String> group1ExpectedStartSequenceNumbers = new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, "100"), ImmutableSet.of(SHARD_ID2));
    SeekableStreamEndSequenceNumbers<String, String> group1ExpectedEndSequenceNumbers = new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER));
    Assert.assertEquals(2, postSplitTasks.size());
    KinesisIndexTaskIOConfig group0Config = ((KinesisIndexTask) postSplitTasks.get(0)).getIOConfig();
    KinesisIndexTaskIOConfig group1Config = ((KinesisIndexTask) postSplitTasks.get(1)).getIOConfig();
    Assert.assertEquals((Integer) 0, group0Config.getTaskGroupId());
    Assert.assertEquals((Integer) 1, group1Config.getTaskGroupId());
    Assert.assertEquals(group0ExpectedStartSequenceNumbers, group0Config.getStartSequenceNumbers());
    Assert.assertEquals(group0ExpectedEndSequenceNumbers, group0Config.getEndSequenceNumbers());
    Assert.assertEquals(group1ExpectedStartSequenceNumbers, group1Config.getStartSequenceNumbers());
    Assert.assertEquals(group1ExpectedEndSequenceNumbers, group1Config.getEndSequenceNumbers());
    Map<Integer, Set<String>> expectedPartitionGroups = ImmutableMap.of(0, ImmutableSet.of(SHARD_ID1), 1, ImmutableSet.of(SHARD_ID2));
    Assert.assertEquals(expectedPartitionGroups, supervisor.getPartitionGroups());
    ConcurrentHashMap<String, String> expectedPartitionOffsets = new ConcurrentHashMap<>(ImmutableMap.of(SHARD_ID2, "-1", SHARD_ID1, "-1", SHARD_ID0, "-1"));
    Assert.assertEquals(expectedPartitionOffsets, supervisor.getPartitionOffsets());
}
Also used : StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) Task(org.apache.druid.indexing.common.task.Task) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) TreeMap(java.util.TreeMap) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) KinesisDataSourceMetadata(org.apache.druid.indexing.kinesis.KinesisDataSourceMetadata) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) KinesisIndexTaskIOConfig(org.apache.druid.indexing.kinesis.KinesisIndexTaskIOConfig)

Aggregations

KinesisIndexTask (org.apache.druid.indexing.kinesis.KinesisIndexTask)24 KinesisIndexTaskIOConfig (org.apache.druid.indexing.kinesis.KinesisIndexTaskIOConfig)24 KinesisDataSourceMetadata (org.apache.druid.indexing.kinesis.KinesisDataSourceMetadata)22 RealtimeIndexTask (org.apache.druid.indexing.common.task.RealtimeIndexTask)18 Task (org.apache.druid.indexing.common.task.Task)18 ImmutableMap (com.google.common.collect.ImmutableMap)16 HashMap (java.util.HashMap)16 Map (java.util.Map)16 TreeMap (java.util.TreeMap)16 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)16 Test (org.junit.Test)14 ArrayList (java.util.ArrayList)12 Executor (java.util.concurrent.Executor)12 TaskRunnerListener (org.apache.druid.indexing.overlord.TaskRunnerListener)12 Collection (java.util.Collection)10 TaskLocation (org.apache.druid.indexer.TaskLocation)10 SeekableStreamEndSequenceNumbers (org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers)8 SeekableStreamStartSequenceNumbers (org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers)8 StreamPartition (org.apache.druid.indexing.seekablestream.common.StreamPartition)8 ImmutableSet (com.google.common.collect.ImmutableSet)4