use of org.apache.druid.indexing.seekablestream.common.StreamPartition in project druid by druid-io.
the class KafkaSupervisor method updatePartitionLagFromStream.
@Override
protected void updatePartitionLagFromStream() {
getRecordSupplierLock().lock();
try {
Set<Integer> partitionIds;
try {
partitionIds = recordSupplier.getPartitionIds(getIoConfig().getStream());
} catch (Exception e) {
log.warn("Could not fetch partitions for topic/stream [%s]", getIoConfig().getStream());
throw new StreamException(e);
}
Set<StreamPartition<Integer>> partitions = partitionIds.stream().map(e -> new StreamPartition<>(getIoConfig().getStream(), e)).collect(Collectors.toSet());
recordSupplier.seekToLatest(partitions);
// this method isn't actually computing the lag, just fetching the latests offsets from the stream. This is
// because we currently only have record lag for kafka, which can be lazily computed by subtracting the highest
// task offsets from the latest offsets from the stream when it is needed
latestSequenceFromStream = partitions.stream().collect(Collectors.toMap(StreamPartition::getPartitionId, recordSupplier::getPosition));
} catch (InterruptedException e) {
throw new StreamException(e);
} finally {
getRecordSupplierLock().unlock();
}
}
use of org.apache.druid.indexing.seekablestream.common.StreamPartition in project druid by druid-io.
the class KinesisSupervisorTest method testShardMergePhaseThree.
/**
* Test task creation after a shard merge with two closed shards and one open shard, with the closed shards
* expiring and no longer being returned from record supplier.
*
* @param phaseTwoTasks List of tasks from the second phase where closed but not expired shards were present.
*/
private void testShardMergePhaseThree(List<Task> phaseTwoTasks) throws Exception {
EasyMock.reset(indexerMetadataStorageCoordinator);
EasyMock.reset(taskStorage);
EasyMock.reset(taskQueue);
EasyMock.reset(taskClient);
EasyMock.reset(taskMaster);
EasyMock.reset(taskRunner);
EasyMock.reset(supervisorRecordSupplier);
// second set of tasks ran, shard 0 has expired, but shard 1 and 2 have data
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<String, String>(STREAM, ImmutableMap.of(SHARD_ID0, KinesisSequenceNumber.END_OF_SHARD_MARKER, SHARD_ID1, KinesisSequenceNumber.END_OF_SHARD_MARKER, SHARD_ID2, "100")))).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.resetDataSourceMetadata(DATASOURCE, new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<String, String>(STREAM, ImmutableMap.of(SHARD_ID0, KinesisSequenceNumber.EXPIRED_MARKER, SHARD_ID1, KinesisSequenceNumber.EXPIRED_MARKER, SHARD_ID2, "100"))))).andReturn(true).anyTimes();
EasyMock.expect(supervisorRecordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID2)).anyTimes();
EasyMock.expect(supervisorRecordSupplier.getAssignment()).andReturn(ImmutableSet.of(SHARD2_PARTITION)).anyTimes();
supervisorRecordSupplier.seekToLatest(EasyMock.anyObject());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(supervisorRecordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID2))).andReturn("200").anyTimes();
supervisorRecordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
EasyMock.expectLastCall().anyTimes();
Capture<Task> postSplitCaptured = Capture.newInstance(CaptureType.ALL);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
Task successfulTask0 = phaseTwoTasks.get(0);
EasyMock.expect(taskStorage.getStatus(successfulTask0.getId())).andReturn(Optional.of(TaskStatus.success(successfulTask0.getId())));
EasyMock.expect(taskStorage.getTask(successfulTask0.getId())).andReturn(Optional.of(successfulTask0)).anyTimes();
EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
EasyMock.expect(taskQueue.add(EasyMock.capture(postSplitCaptured))).andReturn(true).times(1);
replayAll();
supervisor.runInternal();
verifyAll();
EasyMock.reset(taskStorage);
EasyMock.reset(taskClient);
EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
TreeMap<Integer, Map<String, String>> checkpointsGroup0 = new TreeMap<>();
checkpointsGroup0.put(0, ImmutableMap.of(SHARD_ID2, "100"));
// there would be 1 task, only task group 0 has a shard
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpointsGroup0)).times(1);
List<Task> postSplitTasks = postSplitCaptured.getValues();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(postSplitTasks).anyTimes();
for (Task task : postSplitTasks) {
EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
}
EasyMock.replay(taskStorage);
EasyMock.replay(taskClient);
supervisor.runInternal();
verifyAll();
// Check that shardId-000000000000 which has hit EOS is not included in the sequences sent to the task for group 0
SeekableStreamStartSequenceNumbers<String, String> group0ExpectedStartSequenceNumbers = new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, "100"), ImmutableSet.of(SHARD_ID2));
SeekableStreamEndSequenceNumbers<String, String> group0ExpectedEndSequenceNumbers = new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER));
Assert.assertEquals(1, postSplitTasks.size());
KinesisIndexTaskIOConfig group0Config = ((KinesisIndexTask) postSplitTasks.get(0)).getIOConfig();
Assert.assertEquals((Integer) 0, group0Config.getTaskGroupId());
Assert.assertEquals(group0ExpectedStartSequenceNumbers, group0Config.getStartSequenceNumbers());
Assert.assertEquals(group0ExpectedEndSequenceNumbers, group0Config.getEndSequenceNumbers());
Map<Integer, Set<String>> expectedPartitionGroups = ImmutableMap.of(0, ImmutableSet.of(SHARD_ID2), 1, ImmutableSet.of());
ConcurrentHashMap<String, String> expectedPartitionOffsets = new ConcurrentHashMap<>(ImmutableMap.of(SHARD_ID2, "-1", SHARD_ID1, "-1", SHARD_ID0, "-1"));
Assert.assertEquals(expectedPartitionGroups, supervisor.getPartitionGroups());
Assert.assertEquals(expectedPartitionOffsets, supervisor.getPartitionOffsets());
}
use of org.apache.druid.indexing.seekablestream.common.StreamPartition in project druid by druid-io.
the class KinesisRecordSupplierTest method testPollWithKinesisNonRetryableFailure.
@Test
public void testPollWithKinesisNonRetryableFailure() throws InterruptedException {
recordsPerFetch = 100;
EasyMock.expect(kinesis.getShardIterator(EasyMock.anyObject(), EasyMock.eq(SHARD_ID0), EasyMock.anyString(), EasyMock.anyString())).andReturn(getShardIteratorResult0).anyTimes();
AmazonServiceException getException = new AmazonServiceException("BadRequest");
getException.setErrorCode("BadRequest");
getException.setStatusCode(400);
getException.setServiceName("AmazonKinesis");
EasyMock.expect(getShardIteratorResult0.getShardIterator()).andReturn(SHARD0_ITERATOR).anyTimes();
EasyMock.expect(kinesis.getRecords(generateGetRecordsReq(SHARD0_ITERATOR, recordsPerFetch))).andThrow(getException).once();
replayAll();
Set<StreamPartition<String>> partitions = ImmutableSet.of(StreamPartition.of(STREAM, SHARD_ID0));
recordSupplier = new KinesisRecordSupplier(kinesis, recordsPerFetch, 0, 1, false, 100, 5000, 5000, 60000, 100, true);
recordSupplier.assign(partitions);
recordSupplier.seekToEarliest(partitions);
recordSupplier.start();
int count = 0;
while (recordSupplier.isAnyFetchActive() && count++ < 10) {
Thread.sleep(100);
}
Assert.assertFalse(recordSupplier.isAnyFetchActive());
List<OrderedPartitionableRecord<String, String, ByteEntity>> polledRecords = cleanRecords(recordSupplier.poll(POLL_TIMEOUT_MILLIS));
verifyAll();
Assert.assertEquals(partitions, recordSupplier.getAssignment());
Assert.assertEquals(0, polledRecords.size());
}
use of org.apache.druid.indexing.seekablestream.common.StreamPartition in project druid by druid-io.
the class KinesisRecordSupplierTest method testSupplierSetup.
@Test
public void testSupplierSetup() {
final Capture<ListShardsRequest> capturedRequest0 = Capture.newInstance();
final Capture<ListShardsRequest> capturedRequest1 = Capture.newInstance();
EasyMock.expect(kinesis.listShards(EasyMock.capture(capturedRequest0))).andReturn(listShardsResult0).once();
EasyMock.expect(listShardsResult0.getShards()).andReturn(ImmutableList.of(shard0)).once();
String nextToken = "nextToken";
EasyMock.expect(listShardsResult0.getNextToken()).andReturn(nextToken).once();
EasyMock.expect(shard0.getShardId()).andReturn(SHARD_ID0).once();
EasyMock.expect(kinesis.listShards(EasyMock.capture(capturedRequest1))).andReturn(listShardsResult1).once();
EasyMock.expect(listShardsResult1.getShards()).andReturn(ImmutableList.of(shard1)).once();
EasyMock.expect(listShardsResult1.getNextToken()).andReturn(null).once();
EasyMock.expect(shard1.getShardId()).andReturn(SHARD_ID1).once();
replayAll();
Set<StreamPartition<String>> partitions = ImmutableSet.of(StreamPartition.of(STREAM, SHARD_ID0), StreamPartition.of(STREAM, SHARD_ID1));
recordSupplier = new KinesisRecordSupplier(kinesis, recordsPerFetch, 0, 2, false, 100, 5000, 5000, 60000, 5, true);
Assert.assertTrue(recordSupplier.getAssignment().isEmpty());
recordSupplier.assign(partitions);
Assert.assertEquals(partitions, recordSupplier.getAssignment());
Assert.assertEquals(ImmutableSet.of(SHARD_ID1, SHARD_ID0), recordSupplier.getPartitionIds(STREAM));
// calling poll would start background fetch if seek was called, but will instead be skipped and the results
// empty
Assert.assertEquals(Collections.emptyList(), recordSupplier.poll(100));
verifyAll();
final ListShardsRequest expectedRequest0 = new ListShardsRequest();
expectedRequest0.setStreamName(STREAM);
Assert.assertEquals(expectedRequest0, capturedRequest0.getValue());
final ListShardsRequest expectedRequest1 = new ListShardsRequest();
expectedRequest1.setNextToken(nextToken);
Assert.assertEquals(expectedRequest1, capturedRequest1.getValue());
}
use of org.apache.druid.indexing.seekablestream.common.StreamPartition in project druid by druid-io.
the class KinesisRecordSupplierTest method testPollWithKinesisInternalFailure.
@Test
public void testPollWithKinesisInternalFailure() throws InterruptedException {
recordsPerFetch = 100;
EasyMock.expect(kinesis.getShardIterator(EasyMock.anyObject(), EasyMock.eq(SHARD_ID0), EasyMock.anyString(), EasyMock.anyString())).andReturn(getShardIteratorResult0).anyTimes();
EasyMock.expect(kinesis.getShardIterator(EasyMock.anyObject(), EasyMock.eq(SHARD_ID1), EasyMock.anyString(), EasyMock.anyString())).andReturn(getShardIteratorResult1).anyTimes();
EasyMock.expect(getShardIteratorResult0.getShardIterator()).andReturn(SHARD0_ITERATOR).anyTimes();
EasyMock.expect(getShardIteratorResult1.getShardIterator()).andReturn(SHARD1_ITERATOR).anyTimes();
EasyMock.expect(kinesis.getRecords(generateGetRecordsReq(SHARD0_ITERATOR, recordsPerFetch))).andReturn(getRecordsResult0).anyTimes();
EasyMock.expect(kinesis.getRecords(generateGetRecordsReq(SHARD1_ITERATOR, recordsPerFetch))).andReturn(getRecordsResult1).anyTimes();
AmazonServiceException getException = new AmazonServiceException("InternalFailure");
getException.setErrorCode("InternalFailure");
getException.setStatusCode(500);
getException.setServiceName("AmazonKinesis");
EasyMock.expect(getRecordsResult0.getRecords()).andThrow(getException).once();
EasyMock.expect(getRecordsResult0.getRecords()).andReturn(SHARD0_RECORDS).once();
AmazonServiceException getException2 = new AmazonServiceException("InternalFailure");
getException2.setErrorCode("InternalFailure");
getException2.setStatusCode(503);
getException2.setServiceName("AmazonKinesis");
EasyMock.expect(getRecordsResult1.getRecords()).andThrow(getException2).once();
EasyMock.expect(getRecordsResult1.getRecords()).andReturn(SHARD1_RECORDS).once();
EasyMock.expect(getRecordsResult0.getNextShardIterator()).andReturn(null).anyTimes();
EasyMock.expect(getRecordsResult1.getNextShardIterator()).andReturn(null).anyTimes();
EasyMock.expect(getRecordsResult0.getMillisBehindLatest()).andReturn(SHARD0_LAG_MILLIS).once();
EasyMock.expect(getRecordsResult0.getMillisBehindLatest()).andReturn(SHARD0_LAG_MILLIS).once();
EasyMock.expect(getRecordsResult1.getMillisBehindLatest()).andReturn(SHARD1_LAG_MILLIS).once();
EasyMock.expect(getRecordsResult1.getMillisBehindLatest()).andReturn(SHARD1_LAG_MILLIS).once();
replayAll();
Set<StreamPartition<String>> partitions = ImmutableSet.of(StreamPartition.of(STREAM, SHARD_ID0), StreamPartition.of(STREAM, SHARD_ID1));
recordSupplier = new KinesisRecordSupplier(kinesis, recordsPerFetch, 0, 2, false, 100, 5000, 5000, 60000, 100, true);
recordSupplier.assign(partitions);
recordSupplier.seekToEarliest(partitions);
recordSupplier.start();
while (recordSupplier.bufferSize() < 14) {
Thread.sleep(100);
}
List<OrderedPartitionableRecord<String, String, ByteEntity>> polledRecords = cleanRecords(recordSupplier.poll(POLL_TIMEOUT_MILLIS));
verifyAll();
Assert.assertEquals(partitions, recordSupplier.getAssignment());
Assert.assertTrue(polledRecords.containsAll(ALL_RECORDS));
Assert.assertEquals(SHARDS_LAG_MILLIS, recordSupplier.getPartitionResourcesTimeLag());
}
Aggregations