use of org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers in project druid by druid-io.
the class KinesisIndexTaskTest method testRunWithTransformSpec.
@Test(timeout = 120_000L)
public void testRunWithTransformSpec() throws Exception {
recordSupplier.assign(EasyMock.anyObject());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(recordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
recordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(recordSupplier.poll(EasyMock.anyLong())).andReturn(generateRecords(0, 13)).once();
recordSupplier.close();
EasyMock.expectLastCall().once();
replayAll();
final KinesisIndexTask task = createTask(null, NEW_DATA_SCHEMA.withTransformSpec(new TransformSpec(new SelectorDimFilter("dim1", "b", null), ImmutableList.of(new ExpressionTransform("dim1t", "concat(dim1,dim1)", ExprMacroTable.nil())))), new KinesisIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "0"), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "4")), true, null, null, INPUT_FORMAT, "awsEndpoint", null, null, null, null, false));
final ListenableFuture<TaskStatus> future = runTask(task);
// Wait for the task to start reading
while (task.getRunner().getStatus() != SeekableStreamIndexTaskRunner.Status.READING) {
Thread.sleep(10);
}
// Wait for task to exit
Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode());
verifyAll();
// Check metrics
Assert.assertEquals(1, task.getRunner().getRowIngestionMeters().getProcessed());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getUnparseable());
Assert.assertEquals(4, task.getRunner().getRowIngestionMeters().getThrownAway());
// Check published metadata
assertEqualsExceptVersion(ImmutableList.of(sdd("2009/P1D", 0)), publishedDescriptors());
Assert.assertEquals(new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "4"))), newDataSchemaMetadata());
// Check segments in deep storage
final List<SegmentDescriptor> publishedDescriptors = publishedDescriptors();
Assert.assertEquals(ImmutableList.of("b"), readSegmentColumn("dim1", publishedDescriptors.get(0)));
Assert.assertEquals(ImmutableList.of("bb"), readSegmentColumn("dim1t", publishedDescriptors.get(0)));
}
use of org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers in project druid by druid-io.
the class KinesisIndexTaskTest method testMultipleParseExceptionsSuccess.
@Test(timeout = 120_000L)
public void testMultipleParseExceptionsSuccess() throws Exception {
reportParseExceptions = false;
maxParseExceptions = 7;
maxSavedParseExceptions = 7;
recordSupplier.assign(EasyMock.anyObject());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(recordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
recordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(recordSupplier.poll(EasyMock.anyLong())).andReturn(generateRecords(2, 13)).once();
recordSupplier.close();
EasyMock.expectLastCall().once();
replayAll();
final KinesisIndexTask task = createTask(null, new KinesisIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "2"), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "12")), true, null, null, INPUT_FORMAT, "awsEndpoint", null, null, null, null, false));
final ListenableFuture<TaskStatus> future = runTask(task);
TaskStatus status = future.get();
// Wait for task to exit
Assert.assertEquals(TaskState.SUCCESS, status.getStatusCode());
verifyAll();
Assert.assertNull(status.getErrorMsg());
// Check metrics
Assert.assertEquals(4, task.getRunner().getRowIngestionMeters().getProcessed());
Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getProcessedWithError());
Assert.assertEquals(4, task.getRunner().getRowIngestionMeters().getUnparseable());
// Check published metadata
assertEqualsExceptVersion(ImmutableList.of(sdd("2010/P1D", 0), sdd("2011/P1D", 0), sdd("2013/P1D", 0), sdd("2049/P1D", 0)), publishedDescriptors());
Assert.assertEquals(new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "12"))), newDataSchemaMetadata());
IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED, 4, RowIngestionMeters.PROCESSED_WITH_ERROR, 3, RowIngestionMeters.UNPARSEABLE, 4, RowIngestionMeters.THROWN_AWAY, 0));
Assert.assertEquals(expectedMetrics, reportData.getRowStats());
List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
List<String> expectedMessages = Arrays.asList("Unable to parse value[notanumber] for field[met1]", "could not convert value [notanumber] to float", "could not convert value [notanumber] to long", "Timestamp[null] is unparseable! Event: {} (Record: 1)", "Unable to parse [] as the intermediateRow resulted in empty input row (Record: 1)", "Unable to parse row [unparseable] (Record: 1)", "Encountered row with timestamp[246140482-04-24T15:36:27.903Z] that cannot be represented as a long: [{timestamp=246140482-04-24T15:36:27.903Z, dim1=x, dim2=z, dimLong=10, dimFloat=20.0, met1=1.0}] (Record: 1)");
List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
return ((List<String>) r.get("details")).get(0);
}).collect(Collectors.toList());
Assert.assertEquals(expectedMessages, actualMessages);
List<String> expectedInputs = Arrays.asList("{timestamp=2049, dim1=f, dim2=y, dimLong=10, dimFloat=20.0, met1=notanumber}", "{timestamp=2049, dim1=f, dim2=y, dimLong=10, dimFloat=notanumber, met1=1.0}", "{timestamp=2049, dim1=f, dim2=y, dimLong=notanumber, dimFloat=20.0, met1=1.0}", "{}", "", "unparseable", "{timestamp=246140482-04-24T15:36:27.903Z, dim1=x, dim2=z, dimLong=10, dimFloat=20.0, met1=1.0}");
List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
return (String) r.get("input");
}).collect(Collectors.toList());
Assert.assertEquals(expectedInputs, actualInputs);
}
use of org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers in project druid by druid-io.
the class KinesisSupervisorTest method testShardSplitPhaseThree.
/**
* Test task creation after a shard split with a closed shard, with the closed shards expiring and no longer
* being returned from record supplier.
*
* @param phaseTwoTasks List of tasks from the second phase where closed but not expired shards were present.
*/
private void testShardSplitPhaseThree(List<Task> phaseTwoTasks) throws Exception {
EasyMock.reset(indexerMetadataStorageCoordinator);
EasyMock.reset(taskStorage);
EasyMock.reset(taskQueue);
EasyMock.reset(taskClient);
EasyMock.reset(taskMaster);
EasyMock.reset(taskRunner);
EasyMock.reset(supervisorRecordSupplier);
// second set of tasks ran, shard 0 has expired, but shard 1 and 2 have data
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<String, String>(STREAM, ImmutableMap.of(SHARD_ID0, KinesisSequenceNumber.END_OF_SHARD_MARKER, SHARD_ID1, "100", SHARD_ID2, "100")))).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.resetDataSourceMetadata(DATASOURCE, new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<String, String>(STREAM, ImmutableMap.of(SHARD_ID0, KinesisSequenceNumber.EXPIRED_MARKER, SHARD_ID1, "100", SHARD_ID2, "100"))))).andReturn(true).anyTimes();
EasyMock.expect(supervisorRecordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID1, SHARD_ID2)).anyTimes();
EasyMock.expect(supervisorRecordSupplier.getAssignment()).andReturn(ImmutableSet.of(SHARD1_PARTITION, SHARD2_PARTITION)).anyTimes();
supervisorRecordSupplier.seekToLatest(EasyMock.anyObject());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(supervisorRecordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID1))).andReturn("200").anyTimes();
EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID2))).andReturn("200").anyTimes();
supervisorRecordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
EasyMock.expectLastCall().anyTimes();
Capture<Task> postSplitCaptured = Capture.newInstance(CaptureType.ALL);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
Task successfulTask0 = phaseTwoTasks.get(0);
EasyMock.expect(taskStorage.getStatus(successfulTask0.getId())).andReturn(Optional.of(TaskStatus.success(successfulTask0.getId())));
EasyMock.expect(taskStorage.getTask(successfulTask0.getId())).andReturn(Optional.of(successfulTask0)).anyTimes();
Task successfulTask1 = phaseTwoTasks.get(1);
EasyMock.expect(taskStorage.getStatus(successfulTask1.getId())).andReturn(Optional.of(TaskStatus.success(successfulTask1.getId())));
EasyMock.expect(taskStorage.getTask(successfulTask1.getId())).andReturn(Optional.of(successfulTask1)).anyTimes();
EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
EasyMock.expect(taskQueue.add(EasyMock.capture(postSplitCaptured))).andReturn(true).times(2);
replayAll();
supervisor.runInternal();
verifyAll();
EasyMock.reset(taskStorage);
EasyMock.reset(taskClient);
EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
TreeMap<Integer, Map<String, String>> checkpointsGroup0 = new TreeMap<>();
checkpointsGroup0.put(0, ImmutableMap.of(SHARD_ID2, "100"));
TreeMap<Integer, Map<String, String>> checkpointsGroup1 = new TreeMap<>();
checkpointsGroup1.put(1, ImmutableMap.of(SHARD_ID1, "100"));
// there would be 2 tasks, 1 for each task group
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpointsGroup0)).times(1);
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpointsGroup1)).times(1);
List<Task> postSplitTasks = postSplitCaptured.getValues();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(postSplitTasks).anyTimes();
for (Task task : postSplitTasks) {
EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
}
EasyMock.replay(taskStorage);
EasyMock.replay(taskClient);
supervisor.runInternal();
verifyAll();
// Check that shardId-000000000000 which has hit EOS is not included in the sequences sent to the task for group 0
SeekableStreamStartSequenceNumbers<String, String> group0ExpectedStartSequenceNumbers = new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "100"), ImmutableSet.of(SHARD_ID1));
SeekableStreamEndSequenceNumbers<String, String> group0ExpectedEndSequenceNumbers = new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER));
SeekableStreamStartSequenceNumbers<String, String> group1ExpectedStartSequenceNumbers = new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, "100"), ImmutableSet.of(SHARD_ID2));
SeekableStreamEndSequenceNumbers<String, String> group1ExpectedEndSequenceNumbers = new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER));
Assert.assertEquals(2, postSplitTasks.size());
KinesisIndexTaskIOConfig group0Config = ((KinesisIndexTask) postSplitTasks.get(0)).getIOConfig();
KinesisIndexTaskIOConfig group1Config = ((KinesisIndexTask) postSplitTasks.get(1)).getIOConfig();
Assert.assertEquals((Integer) 0, group0Config.getTaskGroupId());
Assert.assertEquals((Integer) 1, group1Config.getTaskGroupId());
Assert.assertEquals(group0ExpectedStartSequenceNumbers, group0Config.getStartSequenceNumbers());
Assert.assertEquals(group0ExpectedEndSequenceNumbers, group0Config.getEndSequenceNumbers());
Assert.assertEquals(group1ExpectedStartSequenceNumbers, group1Config.getStartSequenceNumbers());
Assert.assertEquals(group1ExpectedEndSequenceNumbers, group1Config.getEndSequenceNumbers());
Map<Integer, Set<String>> expectedPartitionGroups = ImmutableMap.of(0, ImmutableSet.of(SHARD_ID1), 1, ImmutableSet.of(SHARD_ID2));
Assert.assertEquals(expectedPartitionGroups, supervisor.getPartitionGroups());
ConcurrentHashMap<String, String> expectedPartitionOffsets = new ConcurrentHashMap<>(ImmutableMap.of(SHARD_ID2, "-1", SHARD_ID1, "-1", SHARD_ID0, "-1"));
Assert.assertEquals(expectedPartitionOffsets, supervisor.getPartitionOffsets());
}
use of org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers in project druid by druid-io.
the class KinesisSupervisorTest method testShardSplitPhaseTwo.
/**
* Test task creation after a shard split with a closed shard
*
* @param phaseOneTasks List of tasks from the initial phase where only one shard was present
*/
private List<Task> testShardSplitPhaseTwo(List<Task> phaseOneTasks) throws Exception {
EasyMock.reset(indexerMetadataStorageCoordinator);
EasyMock.reset(taskStorage);
EasyMock.reset(taskQueue);
EasyMock.reset(taskClient);
EasyMock.reset(taskMaster);
EasyMock.reset(taskRunner);
EasyMock.reset(supervisorRecordSupplier);
// first task ran, its shard 0 has reached EOS
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<String, String>(STREAM, ImmutableMap.of(SHARD_ID0, KinesisSequenceNumber.END_OF_SHARD_MARKER)))).anyTimes();
EasyMock.expect(supervisorRecordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID0, SHARD_ID1, SHARD_ID2)).anyTimes();
EasyMock.expect(supervisorRecordSupplier.getAssignment()).andReturn(ImmutableSet.of(SHARD0_PARTITION, SHARD1_PARTITION, SHARD2_PARTITION)).anyTimes();
supervisorRecordSupplier.seekToLatest(EasyMock.anyObject());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(supervisorRecordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID0))).andReturn(KinesisSequenceNumber.END_OF_SHARD_MARKER).anyTimes();
EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID1))).andReturn("100").anyTimes();
EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID2))).andReturn("100").anyTimes();
supervisorRecordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
EasyMock.expectLastCall().anyTimes();
Capture<Task> postSplitCaptured = Capture.newInstance(CaptureType.ALL);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
Task successfulTask = phaseOneTasks.get(0);
EasyMock.expect(taskStorage.getStatus(successfulTask.getId())).andReturn(Optional.of(TaskStatus.success(successfulTask.getId())));
EasyMock.expect(taskStorage.getTask(successfulTask.getId())).andReturn(Optional.of(successfulTask)).anyTimes();
EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
EasyMock.expect(taskQueue.add(EasyMock.capture(postSplitCaptured))).andReturn(true).times(2);
replayAll();
supervisor.runInternal();
verifyAll();
EasyMock.reset(taskStorage);
EasyMock.reset(taskClient);
EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
TreeMap<Integer, Map<String, String>> checkpointsGroup0 = new TreeMap<>();
checkpointsGroup0.put(0, ImmutableMap.of(SHARD_ID1, "0"));
TreeMap<Integer, Map<String, String>> checkpointsGroup1 = new TreeMap<>();
checkpointsGroup1.put(1, ImmutableMap.of(SHARD_ID2, "0"));
// there would be 2 tasks, 1 for each task group
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpointsGroup0)).times(1);
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpointsGroup1)).times(1);
List<Task> postSplitTasks = postSplitCaptured.getValues();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(postSplitTasks).anyTimes();
for (Task task : postSplitTasks) {
EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
}
EasyMock.replay(taskStorage);
EasyMock.replay(taskClient);
supervisor.runInternal();
verifyAll();
// Check that shardId-000000000000 which has hit EOS is not included in the sequences sent to the task for group 0
SeekableStreamStartSequenceNumbers<String, String> group0ExpectedStartSequenceNumbers = new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "0"), ImmutableSet.of());
SeekableStreamEndSequenceNumbers<String, String> group0ExpectedEndSequenceNumbers = new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER));
SeekableStreamStartSequenceNumbers<String, String> group1ExpectedStartSequenceNumbers = new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, "0"), ImmutableSet.of());
SeekableStreamEndSequenceNumbers<String, String> group1ExpectedEndSequenceNumbers = new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER));
Assert.assertEquals(2, postSplitTasks.size());
KinesisIndexTaskIOConfig group0Config = ((KinesisIndexTask) postSplitTasks.get(0)).getIOConfig();
KinesisIndexTaskIOConfig group1Config = ((KinesisIndexTask) postSplitTasks.get(1)).getIOConfig();
Assert.assertEquals((Integer) 0, group0Config.getTaskGroupId());
Assert.assertEquals((Integer) 1, group1Config.getTaskGroupId());
Assert.assertEquals(group0ExpectedStartSequenceNumbers, group0Config.getStartSequenceNumbers());
Assert.assertEquals(group0ExpectedEndSequenceNumbers, group0Config.getEndSequenceNumbers());
Assert.assertEquals(group1ExpectedStartSequenceNumbers, group1Config.getStartSequenceNumbers());
Assert.assertEquals(group1ExpectedEndSequenceNumbers, group1Config.getEndSequenceNumbers());
return postSplitTasks;
}
use of org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers in project druid by druid-io.
the class KinesisSupervisorTest method testShardMergePhaseTwo.
/**
* Test task creation after a shard split with a closed shard
*
* @param phaseOneTasks List of tasks from the initial phase where only one shard was present
*/
private List<Task> testShardMergePhaseTwo(List<Task> phaseOneTasks) throws Exception {
EasyMock.reset(indexerMetadataStorageCoordinator);
EasyMock.reset(taskStorage);
EasyMock.reset(taskQueue);
EasyMock.reset(taskClient);
EasyMock.reset(taskMaster);
EasyMock.reset(taskRunner);
EasyMock.reset(supervisorRecordSupplier);
// first tasks ran, both shard 0 and shard 1 have reached EOS, merged into shard 2
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<String, String>(STREAM, ImmutableMap.of(SHARD_ID0, KinesisSequenceNumber.END_OF_SHARD_MARKER, SHARD_ID1, KinesisSequenceNumber.END_OF_SHARD_MARKER)))).anyTimes();
EasyMock.expect(supervisorRecordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID0, SHARD_ID1, SHARD_ID2)).anyTimes();
EasyMock.expect(supervisorRecordSupplier.getAssignment()).andReturn(ImmutableSet.of(SHARD0_PARTITION, SHARD1_PARTITION, SHARD2_PARTITION)).anyTimes();
supervisorRecordSupplier.seekToLatest(EasyMock.anyObject());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(supervisorRecordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID0))).andReturn(KinesisSequenceNumber.END_OF_SHARD_MARKER).anyTimes();
EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID1))).andReturn(KinesisSequenceNumber.END_OF_SHARD_MARKER).anyTimes();
EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(new StreamPartition<>(STREAM, SHARD_ID2))).andReturn("100").anyTimes();
supervisorRecordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
EasyMock.expectLastCall().anyTimes();
Capture<Task> postMergeCaptured = Capture.newInstance(CaptureType.ALL);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
Task successfulTask0 = phaseOneTasks.get(0);
Task successfulTask1 = phaseOneTasks.get(1);
EasyMock.expect(taskStorage.getStatus(successfulTask0.getId())).andReturn(Optional.of(TaskStatus.success(successfulTask0.getId())));
EasyMock.expect(taskStorage.getTask(successfulTask0.getId())).andReturn(Optional.of(successfulTask0)).anyTimes();
EasyMock.expect(taskStorage.getStatus(successfulTask1.getId())).andReturn(Optional.of(TaskStatus.success(successfulTask1.getId())));
EasyMock.expect(taskStorage.getTask(successfulTask1.getId())).andReturn(Optional.of(successfulTask1)).anyTimes();
EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
EasyMock.expect(taskQueue.add(EasyMock.capture(postMergeCaptured))).andReturn(true).times(1);
replayAll();
supervisor.runInternal();
verifyAll();
EasyMock.reset(taskStorage);
EasyMock.reset(taskClient);
EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.NOT_STARTED)).anyTimes();
EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
TreeMap<Integer, Map<String, String>> checkpointsGroup0 = new TreeMap<>();
checkpointsGroup0.put(0, ImmutableMap.of(SHARD_ID2, "0", SHARD_ID1, KinesisSequenceNumber.END_OF_SHARD_MARKER));
// there would be 1 tasks, 1 for each task group, but task group 1 only has closed shards, so no task is created
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpointsGroup0)).times(1);
List<Task> postMergeTasks = postMergeCaptured.getValues();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(postMergeTasks).anyTimes();
for (Task task : postMergeTasks) {
EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
}
EasyMock.replay(taskStorage);
EasyMock.replay(taskClient);
supervisor.runInternal();
verifyAll();
// Check that shardId-000000000000 which has hit EOS is not included in the sequences sent to the task for group 0
SeekableStreamStartSequenceNumbers<String, String> group0ExpectedStartSequenceNumbers = new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, "0"), ImmutableSet.of());
SeekableStreamEndSequenceNumbers<String, String> group0ExpectedEndSequenceNumbers = new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID2, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER));
Assert.assertEquals(1, postMergeTasks.size());
KinesisIndexTaskIOConfig group0Config = ((KinesisIndexTask) postMergeTasks.get(0)).getIOConfig();
Assert.assertEquals((Integer) 0, group0Config.getTaskGroupId());
Assert.assertEquals(group0ExpectedStartSequenceNumbers, group0Config.getStartSequenceNumbers());
Assert.assertEquals(group0ExpectedEndSequenceNumbers, group0Config.getEndSequenceNumbers());
return postMergeTasks;
}
Aggregations