Search in sources :

Example 6 with StreamPartition

use of org.apache.druid.indexing.seekablestream.common.StreamPartition in project druid by druid-io.

the class KinesisSupervisorTest method testShardSplitPhaseThree.

/**
 * Test task creation after a shard split with a closed shard, with the closed shards expiring and no longer
 * being returned from record supplier.
 *
 * @param phaseTwoTasks List of tasks from the second phase where closed but not expired shards were present.
 */
private void testShardSplitPhaseThree(List<Task> phaseTwoTasks) throws Exception {
    EasyMock.reset(indexerMetadataStorageCoordinator);
    EasyMock.reset(taskStorage);
    EasyMock.reset(taskQueue);
    EasyMock.reset(taskClient);
    EasyMock.reset(taskMaster);
    EasyMock.reset(taskRunner);
    EasyMock.reset(supervisorRecordSupplier);
    // second set of tasks ran, shard 0 has expired, but shard 1 and 2 have data
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<String, String>(STREAM, ImmutableMap.of(SHARD_ID0, KinesisSequenceNumber.END_OF_SHARD_MARKER, SHARD_ID1, "100", SHARD_ID2, "100")))).anyTimes();
    EasyMock.expect(indexerMetadataStorageCoordinator.resetDataSourceMetadata(DATASOURCE, new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<String, String>(STREAM, ImmutableMap.of(SHARD_ID0, KinesisSequenceNumber.EXPIRED_MARKER, SHARD_ID1, "100", SHARD_ID2, "100"))))).andReturn(true).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID1, SHARD_ID2)).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getAssignment()).andReturn(ImmutableSet.of(SHARD1_PARTITION, SHARD2_PARTITION)).anyTimes();
    supervisorRecordSupplier.seekToLatest(EasyMock.anyObject());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID1))).andReturn("200").anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID2))).andReturn("200").anyTimes();
    supervisorRecordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
    EasyMock.expectLastCall().anyTimes();
    Capture<Task> postSplitCaptured = Capture.newInstance(CaptureType.ALL);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
    Task successfulTask0 = phaseTwoTasks.get(0);
    EasyMock.expect(taskStorage.getStatus(successfulTask0.getId())).andReturn(Optional.of(TaskStatus.success(successfulTask0.getId())));
    EasyMock.expect(taskStorage.getTask(successfulTask0.getId())).andReturn(Optional.of(successfulTask0)).anyTimes();
    Task successfulTask1 = phaseTwoTasks.get(1);
    EasyMock.expect(taskStorage.getStatus(successfulTask1.getId())).andReturn(Optional.of(TaskStatus.success(successfulTask1.getId())));
    EasyMock.expect(taskStorage.getTask(successfulTask1.getId())).andReturn(Optional.of(successfulTask1)).anyTimes();
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(postSplitCaptured))).andReturn(true).times(2);
    replayAll();
    supervisor.runInternal();
    verifyAll();
    EasyMock.reset(taskStorage);
    EasyMock.reset(taskClient);
    EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
    TreeMap<Integer, Map<String, String>> checkpointsGroup0 = new TreeMap<>();
    checkpointsGroup0.put(0, ImmutableMap.of(SHARD_ID2, "100"));
    TreeMap<Integer, Map<String, String>> checkpointsGroup1 = new TreeMap<>();
    checkpointsGroup1.put(1, ImmutableMap.of(SHARD_ID1, "100"));
    // there would be 2 tasks, 1 for each task group
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpointsGroup0)).times(1);
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpointsGroup1)).times(1);
    List<Task> postSplitTasks = postSplitCaptured.getValues();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(postSplitTasks).anyTimes();
    for (Task task : postSplitTasks) {
        EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
        EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
    }
    EasyMock.replay(taskStorage);
    EasyMock.replay(taskClient);
    supervisor.runInternal();
    verifyAll();
    // Check that shardId-000000000000 which has hit EOS is not included in the sequences sent to the task for group 0
    SeekableStreamStartSequenceNumbers<String, String> group0ExpectedStartSequenceNumbers = new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "100"), ImmutableSet.of(SHARD_ID1));
    SeekableStreamEndSequenceNumbers<String, String> group0ExpectedEndSequenceNumbers = new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER));
    SeekableStreamStartSequenceNumbers<String, String> group1ExpectedStartSequenceNumbers = new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, "100"), ImmutableSet.of(SHARD_ID2));
    SeekableStreamEndSequenceNumbers<String, String> group1ExpectedEndSequenceNumbers = new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER));
    Assert.assertEquals(2, postSplitTasks.size());
    KinesisIndexTaskIOConfig group0Config = ((KinesisIndexTask) postSplitTasks.get(0)).getIOConfig();
    KinesisIndexTaskIOConfig group1Config = ((KinesisIndexTask) postSplitTasks.get(1)).getIOConfig();
    Assert.assertEquals((Integer) 0, group0Config.getTaskGroupId());
    Assert.assertEquals((Integer) 1, group1Config.getTaskGroupId());
    Assert.assertEquals(group0ExpectedStartSequenceNumbers, group0Config.getStartSequenceNumbers());
    Assert.assertEquals(group0ExpectedEndSequenceNumbers, group0Config.getEndSequenceNumbers());
    Assert.assertEquals(group1ExpectedStartSequenceNumbers, group1Config.getStartSequenceNumbers());
    Assert.assertEquals(group1ExpectedEndSequenceNumbers, group1Config.getEndSequenceNumbers());
    Map<Integer, Set<String>> expectedPartitionGroups = ImmutableMap.of(0, ImmutableSet.of(SHARD_ID1), 1, ImmutableSet.of(SHARD_ID2));
    Assert.assertEquals(expectedPartitionGroups, supervisor.getPartitionGroups());
    ConcurrentHashMap<String, String> expectedPartitionOffsets = new ConcurrentHashMap<>(ImmutableMap.of(SHARD_ID2, "-1", SHARD_ID1, "-1", SHARD_ID0, "-1"));
    Assert.assertEquals(expectedPartitionOffsets, supervisor.getPartitionOffsets());
}
Also used : StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) Task(org.apache.druid.indexing.common.task.Task) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) TreeMap(java.util.TreeMap) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) KinesisDataSourceMetadata(org.apache.druid.indexing.kinesis.KinesisDataSourceMetadata) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) KinesisIndexTaskIOConfig(org.apache.druid.indexing.kinesis.KinesisIndexTaskIOConfig)

Example 7 with StreamPartition

use of org.apache.druid.indexing.seekablestream.common.StreamPartition in project druid by druid-io.

the class KinesisSupervisorTest method testShardSplitPhaseTwo.

/**
 * Test task creation after a shard split with a closed shard
 *
 * @param phaseOneTasks List of tasks from the initial phase where only one shard was present
 */
private List<Task> testShardSplitPhaseTwo(List<Task> phaseOneTasks) throws Exception {
    EasyMock.reset(indexerMetadataStorageCoordinator);
    EasyMock.reset(taskStorage);
    EasyMock.reset(taskQueue);
    EasyMock.reset(taskClient);
    EasyMock.reset(taskMaster);
    EasyMock.reset(taskRunner);
    EasyMock.reset(supervisorRecordSupplier);
    // first task ran, its shard 0 has reached EOS
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<String, String>(STREAM, ImmutableMap.of(SHARD_ID0, KinesisSequenceNumber.END_OF_SHARD_MARKER)))).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID0, SHARD_ID1, SHARD_ID2)).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getAssignment()).andReturn(ImmutableSet.of(SHARD0_PARTITION, SHARD1_PARTITION, SHARD2_PARTITION)).anyTimes();
    supervisorRecordSupplier.seekToLatest(EasyMock.anyObject());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID0))).andReturn(KinesisSequenceNumber.END_OF_SHARD_MARKER).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID1))).andReturn("100").anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID2))).andReturn("100").anyTimes();
    supervisorRecordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
    EasyMock.expectLastCall().anyTimes();
    Capture<Task> postSplitCaptured = Capture.newInstance(CaptureType.ALL);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
    Task successfulTask = phaseOneTasks.get(0);
    EasyMock.expect(taskStorage.getStatus(successfulTask.getId())).andReturn(Optional.of(TaskStatus.success(successfulTask.getId())));
    EasyMock.expect(taskStorage.getTask(successfulTask.getId())).andReturn(Optional.of(successfulTask)).anyTimes();
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(postSplitCaptured))).andReturn(true).times(2);
    replayAll();
    supervisor.runInternal();
    verifyAll();
    EasyMock.reset(taskStorage);
    EasyMock.reset(taskClient);
    EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
    TreeMap<Integer, Map<String, String>> checkpointsGroup0 = new TreeMap<>();
    checkpointsGroup0.put(0, ImmutableMap.of(SHARD_ID1, "0"));
    TreeMap<Integer, Map<String, String>> checkpointsGroup1 = new TreeMap<>();
    checkpointsGroup1.put(1, ImmutableMap.of(SHARD_ID2, "0"));
    // there would be 2 tasks, 1 for each task group
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpointsGroup0)).times(1);
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpointsGroup1)).times(1);
    List<Task> postSplitTasks = postSplitCaptured.getValues();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(postSplitTasks).anyTimes();
    for (Task task : postSplitTasks) {
        EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
        EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
    }
    EasyMock.replay(taskStorage);
    EasyMock.replay(taskClient);
    supervisor.runInternal();
    verifyAll();
    // Check that shardId-000000000000 which has hit EOS is not included in the sequences sent to the task for group 0
    SeekableStreamStartSequenceNumbers<String, String> group0ExpectedStartSequenceNumbers = new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "0"), ImmutableSet.of());
    SeekableStreamEndSequenceNumbers<String, String> group0ExpectedEndSequenceNumbers = new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER));
    SeekableStreamStartSequenceNumbers<String, String> group1ExpectedStartSequenceNumbers = new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, "0"), ImmutableSet.of());
    SeekableStreamEndSequenceNumbers<String, String> group1ExpectedEndSequenceNumbers = new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER));
    Assert.assertEquals(2, postSplitTasks.size());
    KinesisIndexTaskIOConfig group0Config = ((KinesisIndexTask) postSplitTasks.get(0)).getIOConfig();
    KinesisIndexTaskIOConfig group1Config = ((KinesisIndexTask) postSplitTasks.get(1)).getIOConfig();
    Assert.assertEquals((Integer) 0, group0Config.getTaskGroupId());
    Assert.assertEquals((Integer) 1, group1Config.getTaskGroupId());
    Assert.assertEquals(group0ExpectedStartSequenceNumbers, group0Config.getStartSequenceNumbers());
    Assert.assertEquals(group0ExpectedEndSequenceNumbers, group0Config.getEndSequenceNumbers());
    Assert.assertEquals(group1ExpectedStartSequenceNumbers, group1Config.getStartSequenceNumbers());
    Assert.assertEquals(group1ExpectedEndSequenceNumbers, group1Config.getEndSequenceNumbers());
    return postSplitTasks;
}
Also used : StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) Task(org.apache.druid.indexing.common.task.Task) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) TreeMap(java.util.TreeMap) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) KinesisDataSourceMetadata(org.apache.druid.indexing.kinesis.KinesisDataSourceMetadata) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) KinesisIndexTaskIOConfig(org.apache.druid.indexing.kinesis.KinesisIndexTaskIOConfig)

Example 8 with StreamPartition

use of org.apache.druid.indexing.seekablestream.common.StreamPartition in project druid by druid-io.

the class KinesisSupervisorTest method testShardMergePhaseTwo.

/**
 * Test task creation after a shard split with a closed shard
 *
 * @param phaseOneTasks List of tasks from the initial phase where only one shard was present
 */
private List<Task> testShardMergePhaseTwo(List<Task> phaseOneTasks) throws Exception {
    EasyMock.reset(indexerMetadataStorageCoordinator);
    EasyMock.reset(taskStorage);
    EasyMock.reset(taskQueue);
    EasyMock.reset(taskClient);
    EasyMock.reset(taskMaster);
    EasyMock.reset(taskRunner);
    EasyMock.reset(supervisorRecordSupplier);
    // first tasks ran, both shard 0 and shard 1 have reached EOS, merged into shard 2
    EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<String, String>(STREAM, ImmutableMap.of(SHARD_ID0, KinesisSequenceNumber.END_OF_SHARD_MARKER, SHARD_ID1, KinesisSequenceNumber.END_OF_SHARD_MARKER)))).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID0, SHARD_ID1, SHARD_ID2)).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getAssignment()).andReturn(ImmutableSet.of(SHARD0_PARTITION, SHARD1_PARTITION, SHARD2_PARTITION)).anyTimes();
    supervisorRecordSupplier.seekToLatest(EasyMock.anyObject());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID0))).andReturn(KinesisSequenceNumber.END_OF_SHARD_MARKER).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID1))).andReturn(KinesisSequenceNumber.END_OF_SHARD_MARKER).anyTimes();
    EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID2))).andReturn("100").anyTimes();
    supervisorRecordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
    EasyMock.expectLastCall().anyTimes();
    Capture<Task> postMergeCaptured = Capture.newInstance(CaptureType.ALL);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
    EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
    EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
    Task successfulTask0 = phaseOneTasks.get(0);
    Task successfulTask1 = phaseOneTasks.get(1);
    EasyMock.expect(taskStorage.getStatus(successfulTask0.getId())).andReturn(Optional.of(TaskStatus.success(successfulTask0.getId())));
    EasyMock.expect(taskStorage.getTask(successfulTask0.getId())).andReturn(Optional.of(successfulTask0)).anyTimes();
    EasyMock.expect(taskStorage.getStatus(successfulTask1.getId())).andReturn(Optional.of(TaskStatus.success(successfulTask1.getId())));
    EasyMock.expect(taskStorage.getTask(successfulTask1.getId())).andReturn(Optional.of(successfulTask1)).anyTimes();
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
    EasyMock.expect(taskQueue.add(EasyMock.capture(postMergeCaptured))).andReturn(true).times(1);
    replayAll();
    supervisor.runInternal();
    verifyAll();
    EasyMock.reset(taskStorage);
    EasyMock.reset(taskClient);
    EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
    EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
    TreeMap<Integer, Map<String, String>> checkpointsGroup0 = new TreeMap<>();
    checkpointsGroup0.put(0, ImmutableMap.of(SHARD_ID2, "0", SHARD_ID1, KinesisSequenceNumber.END_OF_SHARD_MARKER));
    // there would be 1 tasks, 1 for each task group, but task group 1 only has closed shards, so no task is created
    EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpointsGroup0)).times(1);
    List<Task> postMergeTasks = postMergeCaptured.getValues();
    EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(postMergeTasks).anyTimes();
    for (Task task : postMergeTasks) {
        EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
        EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
    }
    EasyMock.replay(taskStorage);
    EasyMock.replay(taskClient);
    supervisor.runInternal();
    verifyAll();
    // Check that shardId-000000000000 which has hit EOS is not included in the sequences sent to the task for group 0
    SeekableStreamStartSequenceNumbers<String, String> group0ExpectedStartSequenceNumbers = new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, "0"), ImmutableSet.of());
    SeekableStreamEndSequenceNumbers<String, String> group0ExpectedEndSequenceNumbers = new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER));
    Assert.assertEquals(1, postMergeTasks.size());
    KinesisIndexTaskIOConfig group0Config = ((KinesisIndexTask) postMergeTasks.get(0)).getIOConfig();
    Assert.assertEquals((Integer) 0, group0Config.getTaskGroupId());
    Assert.assertEquals(group0ExpectedStartSequenceNumbers, group0Config.getStartSequenceNumbers());
    Assert.assertEquals(group0ExpectedEndSequenceNumbers, group0Config.getEndSequenceNumbers());
    return postMergeTasks;
}
Also used : StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) Task(org.apache.druid.indexing.common.task.Task) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) TreeMap(java.util.TreeMap) KinesisIndexTask(org.apache.druid.indexing.kinesis.KinesisIndexTask) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) KinesisDataSourceMetadata(org.apache.druid.indexing.kinesis.KinesisDataSourceMetadata) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) KinesisIndexTaskIOConfig(org.apache.druid.indexing.kinesis.KinesisIndexTaskIOConfig)

Example 9 with StreamPartition

use of org.apache.druid.indexing.seekablestream.common.StreamPartition in project druid by druid-io.

the class KinesisRecordSupplier method getSequenceNumber.

/**
 * Given a partition and a {@link ShardIteratorType}, create a shard iterator and fetch
 * {@link #GET_SEQUENCE_NUMBER_RECORD_COUNT} records and return the first sequence number from the result set.
 * This method is thread safe as it does not depend on the internal state of the supplier (it doesn't use the
 * {@link PartitionResource} which have been assigned to the supplier), and the Kinesis client is thread safe.
 */
@Nullable
private String getSequenceNumber(StreamPartition<String> partition, ShardIteratorType iteratorEnum) {
    return wrapExceptions(() -> {
        String shardIterator = kinesis.getShardIterator(partition.getStream(), partition.getPartitionId(), iteratorEnum.toString()).getShardIterator();
        long timeoutMillis = System.currentTimeMillis() + fetchSequenceNumberTimeout;
        GetRecordsResult recordsResult = null;
        while (shardIterator != null && System.currentTimeMillis() < timeoutMillis) {
            if (closed) {
                log.info("KinesisRecordSupplier closed while fetching sequenceNumber");
                return null;
            }
            final String currentShardIterator = shardIterator;
            final GetRecordsRequest request = new GetRecordsRequest().withShardIterator(currentShardIterator).withLimit(GET_SEQUENCE_NUMBER_RECORD_COUNT);
            recordsResult = RetryUtils.retry(() -> kinesis.getRecords(request), (throwable) -> {
                if (throwable instanceof ProvisionedThroughputExceededException) {
                    log.warn(throwable, "encountered ProvisionedThroughputExceededException while fetching records, this means " + "that the request rate for the stream is too high, or the requested data is too large for " + "the available throughput. Reduce the frequency or size of your requests. Consider increasing " + "the number of shards to increase throughput.");
                    return true;
                }
                if (throwable instanceof AmazonClientException) {
                    AmazonClientException ase = (AmazonClientException) throwable;
                    return AWSClientUtil.isClientExceptionRecoverable(ase);
                }
                return false;
            }, GET_SEQUENCE_NUMBER_RETRY_COUNT);
            List<Record> records = recordsResult.getRecords();
            if (!records.isEmpty()) {
                return records.get(0).getSequenceNumber();
            }
            shardIterator = recordsResult.getNextShardIterator();
        }
        if (shardIterator == null) {
            log.info("Partition[%s] returned a null shard iterator, is the shard closed?", partition.getPartitionId());
            return KinesisSequenceNumber.END_OF_SHARD_MARKER;
        }
        // if we reach here, it usually means either the shard has no more records, or records have not been
        // added to this shard
        log.warn("timed out while trying to fetch position for shard[%s], millisBehindLatest is [%s], likely no more records in shard", partition.getPartitionId(), recordsResult != null ? recordsResult.getMillisBehindLatest() : "UNKNOWN");
        return null;
    });
}
Also used : Shard(com.amazonaws.services.kinesis.model.Shard) StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) ScheduledFuture(java.util.concurrent.ScheduledFuture) AWSClientUtil(org.apache.druid.common.aws.AWSClientUtil) OrderedPartitionableRecord(org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord) AwsClientBuilder(com.amazonaws.client.builder.AwsClientBuilder) ByteBuffer(java.nio.ByteBuffer) Map(java.util.Map) Method(java.lang.reflect.Method) ListShardsResult(com.amazonaws.services.kinesis.model.ListShardsResult) GetRecordsResult(com.amazonaws.services.kinesis.model.GetRecordsResult) ImmutableSet(com.google.common.collect.ImmutableSet) Execs(org.apache.druid.java.util.common.concurrent.Execs) AWSSecurityTokenServiceClientBuilder(com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder) AmazonKinesis(com.amazonaws.services.kinesis.AmazonKinesis) ShardIteratorType(com.amazonaws.services.kinesis.model.ShardIteratorType) MethodHandles(java.lang.invoke.MethodHandles) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) BlockingQueue(java.util.concurrent.BlockingQueue) UUID(java.util.UUID) STSAssumeRoleSessionCredentialsProvider(com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) StreamException(org.apache.druid.indexing.seekablestream.common.StreamException) Queues(com.google.common.collect.Queues) List(java.util.List) ByteEntity(org.apache.druid.data.input.impl.ByteEntity) KinesisSupervisor(org.apache.druid.indexing.kinesis.supervisor.KinesisSupervisor) AmazonClientException(com.amazonaws.AmazonClientException) MethodHandle(java.lang.invoke.MethodHandle) Record(com.amazonaws.services.kinesis.model.Record) RecordSupplier(org.apache.druid.indexing.seekablestream.common.RecordSupplier) ListShardsRequest(com.amazonaws.services.kinesis.model.ListShardsRequest) ProvisionedThroughputExceededException(com.amazonaws.services.kinesis.model.ProvisionedThroughputExceededException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Callable(java.util.concurrent.Callable) AwsHostNameUtils(com.amazonaws.util.AwsHostNameUtils) ArrayList(java.util.ArrayList) ConcurrentMap(java.util.concurrent.ConcurrentMap) AWSCredentialsUtils(org.apache.druid.common.aws.AWSCredentialsUtils) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) AWSCredentialsProvider(com.amazonaws.auth.AWSCredentialsProvider) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ResourceNotFoundException(com.amazonaws.services.kinesis.model.ResourceNotFoundException) RetryUtils(org.apache.druid.java.util.common.RetryUtils) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) Iterator(java.util.Iterator) InvalidArgumentException(com.amazonaws.services.kinesis.model.InvalidArgumentException) Maps(com.google.common.collect.Maps) AWSCredentialsConfig(org.apache.druid.common.aws.AWSCredentialsConfig) TimeUnit(java.util.concurrent.TimeUnit) ClientConfiguration(com.amazonaws.ClientConfiguration) AmazonKinesisClientBuilder(com.amazonaws.services.kinesis.AmazonKinesisClientBuilder) GetRecordsRequest(com.amazonaws.services.kinesis.model.GetRecordsRequest) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ExpiredIteratorException(com.amazonaws.services.kinesis.model.ExpiredIteratorException) Collections(java.util.Collections) GetRecordsResult(com.amazonaws.services.kinesis.model.GetRecordsResult) AmazonClientException(com.amazonaws.AmazonClientException) OrderedPartitionableRecord(org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord) Record(com.amazonaws.services.kinesis.model.Record) ProvisionedThroughputExceededException(com.amazonaws.services.kinesis.model.ProvisionedThroughputExceededException) GetRecordsRequest(com.amazonaws.services.kinesis.model.GetRecordsRequest) Nullable(javax.annotation.Nullable)

Example 10 with StreamPartition

use of org.apache.druid.indexing.seekablestream.common.StreamPartition in project druid by druid-io.

the class IncrementalPublishingKafkaIndexTaskRunner method possiblyResetOffsetsOrWait.

private void possiblyResetOffsetsOrWait(Map<TopicPartition, Long> outOfRangePartitions, RecordSupplier<Integer, Long, KafkaRecordEntity> recordSupplier, TaskToolbox taskToolbox) throws InterruptedException, IOException {
    final Map<TopicPartition, Long> resetPartitions = new HashMap<>();
    boolean doReset = false;
    if (task.getTuningConfig().isResetOffsetAutomatically()) {
        for (Map.Entry<TopicPartition, Long> outOfRangePartition : outOfRangePartitions.entrySet()) {
            final TopicPartition topicPartition = outOfRangePartition.getKey();
            final long nextOffset = outOfRangePartition.getValue();
            // seek to the beginning to get the least available offset
            StreamPartition<Integer> streamPartition = StreamPartition.of(topicPartition.topic(), topicPartition.partition());
            final Long leastAvailableOffset = recordSupplier.getEarliestSequenceNumber(streamPartition);
            if (leastAvailableOffset == null) {
                throw new ISE("got null sequence number for partition[%s] when fetching from kafka!", topicPartition.partition());
            }
            // reset the seek
            recordSupplier.seek(streamPartition, nextOffset);
            // next message offset that we are trying to fetch
            if (leastAvailableOffset > nextOffset) {
                doReset = true;
                resetPartitions.put(topicPartition, nextOffset);
            }
        }
    }
    if (doReset) {
        sendResetRequestAndWait(CollectionUtils.mapKeys(resetPartitions, streamPartition -> StreamPartition.of(streamPartition.topic(), streamPartition.partition())), taskToolbox);
    } else {
        log.warn("Retrying in %dms", task.getPollRetryMs());
        pollRetryLock.lockInterruptibly();
        try {
            long nanos = TimeUnit.MILLISECONDS.toNanos(task.getPollRetryMs());
            while (nanos > 0L && !pauseRequested && !stopRequested.get()) {
                nanos = isAwaitingRetry.awaitNanos(nanos);
            }
        } finally {
            pollRetryLock.unlock();
        }
    }
}
Also used : TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) SeekableStreamSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamSequenceNumbers) StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) RecordSupplier(org.apache.druid.indexing.seekablestream.common.RecordSupplier) OrderedPartitionableRecord(org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord) AuthorizerMapper(org.apache.druid.server.security.AuthorizerMapper) CollectionUtils(org.apache.druid.utils.CollectionUtils) HashMap(java.util.HashMap) ByteBuffer(java.nio.ByteBuffer) SequenceMetadata(org.apache.druid.indexing.seekablestream.SequenceMetadata) Map(java.util.Map) TypeReference(com.fasterxml.jackson.core.type.TypeReference) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) OffsetOutOfRangeException(org.apache.kafka.clients.consumer.OffsetOutOfRangeException) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) TopicPartition(org.apache.kafka.common.TopicPartition) SeekableStreamDataSourceMetadata(org.apache.druid.indexing.seekablestream.SeekableStreamDataSourceMetadata) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) IOException(java.io.IOException) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) NotNull(javax.validation.constraints.NotNull) LockGranularity(org.apache.druid.indexing.common.LockGranularity) OrderedSequenceNumber(org.apache.druid.indexing.seekablestream.common.OrderedSequenceNumber) TimeUnit(java.util.concurrent.TimeUnit) KafkaRecordEntity(org.apache.druid.data.input.kafka.KafkaRecordEntity) List(java.util.List) TreeMap(java.util.TreeMap) Collections(java.util.Collections) SeekableStreamIndexTaskRunner(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskRunner) HashMap(java.util.HashMap) TopicPartition(org.apache.kafka.common.TopicPartition) ISE(org.apache.druid.java.util.common.ISE) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap)

Aggregations

StreamPartition (org.apache.druid.indexing.seekablestream.common.StreamPartition)12 Map (java.util.Map)8 TreeMap (java.util.TreeMap)7 HashMap (java.util.HashMap)6 Set (java.util.Set)6 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)6 SeekableStreamEndSequenceNumbers (org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers)6 ImmutableMap (com.google.common.collect.ImmutableMap)5 RealtimeIndexTask (org.apache.druid.indexing.common.task.RealtimeIndexTask)5 Task (org.apache.druid.indexing.common.task.Task)5 SeekableStreamStartSequenceNumbers (org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers)5 OrderedPartitionableRecord (org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord)5 ImmutableSet (com.google.common.collect.ImmutableSet)4 Collections (java.util.Collections)4 List (java.util.List)4 Nullable (javax.annotation.Nullable)4 KinesisDataSourceMetadata (org.apache.druid.indexing.kinesis.KinesisDataSourceMetadata)4 KinesisIndexTask (org.apache.druid.indexing.kinesis.KinesisIndexTask)4 KinesisIndexTaskIOConfig (org.apache.druid.indexing.kinesis.KinesisIndexTaskIOConfig)4 TypeReference (com.fasterxml.jackson.core.type.TypeReference)3