use of org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers in project druid by druid-io.
the class KinesisIndexTaskTest method testMultipleParseExceptionsFailure.
@Test(timeout = 120_000L)
public void testMultipleParseExceptionsFailure() throws Exception {
reportParseExceptions = false;
maxParseExceptions = 2;
maxSavedParseExceptions = 2;
recordSupplier.assign(EasyMock.anyObject());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(recordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
recordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(recordSupplier.poll(EasyMock.anyLong())).andReturn(generateRecords(2, 13)).once();
recordSupplier.close();
EasyMock.expectLastCall().once();
replayAll();
final KinesisIndexTask task = createTask(null, new KinesisIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "2"), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "9")), true, null, null, INPUT_FORMAT, "awsEndpoint", null, null, null, null, false));
final ListenableFuture<TaskStatus> future = runTask(task);
TaskStatus status = future.get();
// Wait for task to exit
Assert.assertEquals(TaskState.FAILED, status.getStatusCode());
verifyAll();
IndexTaskTest.checkTaskStatusErrorMsgForParseExceptionsExceeded(status);
// Check metrics
Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getProcessed());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getProcessedWithError());
Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getUnparseable());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getThrownAway());
// Check published metadata
Assert.assertEquals(ImmutableList.of(), publishedDescriptors());
Assert.assertNull(newDataSchemaMetadata());
IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED, 3, RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.UNPARSEABLE, 3, RowIngestionMeters.THROWN_AWAY, 0));
Assert.assertEquals(expectedMetrics, reportData.getRowStats());
List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
List<String> expectedMessages = Arrays.asList("Unable to parse [] as the intermediateRow resulted in empty input row (Record: 1)", "Unable to parse row [unparseable] (Record: 1)");
List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
return ((List<String>) r.get("details")).get(0);
}).collect(Collectors.toList());
Assert.assertEquals(expectedMessages, actualMessages);
List<String> expectedInputs = Arrays.asList("", "unparseable");
List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
return (String) r.get("input");
}).collect(Collectors.toList());
Assert.assertEquals(expectedInputs, actualInputs);
}
use of org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers in project druid by druid-io.
the class KafkaSupervisorTest method testCreateBaseTaskContexts.
@Test
public void testCreateBaseTaskContexts() throws JsonProcessingException {
supervisor = getTestableSupervisor(1, 1, true, "PT1H", null, null);
final Map<String, Object> contexts = supervisor.createIndexTasks(1, "seq", OBJECT_MAPPER, new TreeMap<>(), new KafkaIndexTaskIOConfig(0, "seq", new SeekableStreamStartSequenceNumbers<>("test", Collections.emptyMap(), Collections.emptySet()), new SeekableStreamEndSequenceNumbers<>("test", Collections.emptyMap()), Collections.emptyMap(), null, null, null, null, INPUT_FORMAT), new KafkaIndexTaskTuningConfig(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null), null).get(0).getContext();
final Boolean contextValue = (Boolean) contexts.get("IS_INCREMENTAL_HANDOFF_SUPPORTED");
Assert.assertNotNull(contextValue);
Assert.assertTrue(contextValue);
}
use of org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers in project druid by druid-io.
the class KinesisSupervisor method createDataSourceMetadataWithClosedOrExpiredPartitions.
private SeekableStreamDataSourceMetadata<String, String> createDataSourceMetadataWithClosedOrExpiredPartitions(SeekableStreamDataSourceMetadata<String, String> currentMetadata, Set<String> terminatedPartitionIds, String terminationMarker) {
final KinesisDataSourceMetadata dataSourceMetadata = (KinesisDataSourceMetadata) currentMetadata;
SeekableStreamSequenceNumbers<String, String> old = dataSourceMetadata.getSeekableStreamSequenceNumbers();
Map<String, String> oldPartitionSequenceNumberMap = old.getPartitionSequenceNumberMap();
Map<String, String> newPartitionSequenceNumberMap = new HashMap<>();
for (Map.Entry<String, String> entry : oldPartitionSequenceNumberMap.entrySet()) {
if (!terminatedPartitionIds.contains(entry.getKey())) {
newPartitionSequenceNumberMap.put(entry.getKey(), entry.getValue());
} else {
newPartitionSequenceNumberMap.put(entry.getKey(), terminationMarker);
}
}
SeekableStreamSequenceNumbers<String, String> newSequences;
if (old instanceof SeekableStreamStartSequenceNumbers) {
Set<String> oldExclusiveStartPartitions;
Set<String> newExclusiveStartPartitions;
newExclusiveStartPartitions = new HashSet<>();
oldExclusiveStartPartitions = ((SeekableStreamStartSequenceNumbers<String, String>) old).getExclusivePartitions();
for (String partitionId : oldExclusiveStartPartitions) {
if (!terminatedPartitionIds.contains(partitionId)) {
newExclusiveStartPartitions.add(partitionId);
}
}
newSequences = new SeekableStreamStartSequenceNumbers<>(old.getStream(), null, newPartitionSequenceNumberMap, null, newExclusiveStartPartitions);
} else {
newSequences = new SeekableStreamEndSequenceNumbers<>(old.getStream(), null, newPartitionSequenceNumberMap, null);
}
return new KinesisDataSourceMetadata(newSequences);
}
use of org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers in project druid by druid-io.
the class KinesisSupervisorTest method testCheckpointForInactiveTaskGroup.
@Test(timeout = 60_000L)
public void testCheckpointForInactiveTaskGroup() throws InterruptedException {
supervisor = getTestableSupervisor(2, 1, true, "PT1S", null, null, false);
// not adding any events
final KinesisIndexTask id1;
id1 = createKinesisIndexTask("id1", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>("stream", ImmutableMap.of(SHARD_ID1, "0", SHARD_ID0, "0"), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>("stream", ImmutableMap.of(SHARD_ID1, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER, SHARD_ID0, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER)), null, null);
final Task id2 = createKinesisIndexTask("id2", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>("stream", ImmutableMap.of(SHARD_ID1, "10", SHARD_ID0, "20"), ImmutableSet.of(SHARD_ID0, SHARD_ID1)), new SeekableStreamEndSequenceNumbers<>("stream", ImmutableMap.of(SHARD_ID1, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER, SHARD_ID0, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER)), null, null);
final Task id3 = createKinesisIndexTask("id3", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>("stream", ImmutableMap.of(SHARD_ID1, "10", SHARD_ID0, "20"), ImmutableSet.of(SHARD_ID0, SHARD_ID1)), new SeekableStreamEndSequenceNumbers<>("stream", ImmutableMap.of(SHARD_ID1, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER, SHARD_ID0, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER)), null, null);
final TaskLocation location1 = new TaskLocation("testHost", 1234, -1);
final TaskLocation location2 = new TaskLocation("testHost2", 145, -1);
Collection workItems = new ArrayList<>();
workItems.add(new TestTaskRunnerWorkItem(id1, null, location1));
workItems.add(new TestTaskRunnerWorkItem(id2, null, location2));
workItems.add(new TestTaskRunnerWorkItem(id2, null, location2));
supervisorRecordSupplier.assign(EasyMock.anyObject());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(supervisorRecordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID1, SHARD_ID0)).anyTimes();
EasyMock.expect(supervisorRecordSupplier.getAssignment()).andReturn(ImmutableSet.of(SHARD1_PARTITION, SHARD0_PARTITION)).anyTimes();
supervisorRecordSupplier.seekToLatest(EasyMock.anyObject());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(supervisorRecordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(SHARD1_PARTITION)).andReturn("12").anyTimes();
EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(SHARD0_PARTITION)).andReturn("1").anyTimes();
supervisorRecordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(taskRunner.getRunningTasks()).andReturn(workItems).anyTimes();
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of(id1, id2, id3)).anyTimes();
EasyMock.expect(taskStorage.getStatus("id1")).andReturn(Optional.of(TaskStatus.running("id1"))).anyTimes();
EasyMock.expect(taskStorage.getStatus("id2")).andReturn(Optional.of(TaskStatus.running("id2"))).anyTimes();
EasyMock.expect(taskStorage.getStatus("id3")).andReturn(Optional.of(TaskStatus.running("id3"))).anyTimes();
EasyMock.expect(taskStorage.getTask("id1")).andReturn(Optional.of(id1)).anyTimes();
EasyMock.expect(taskStorage.getTask("id2")).andReturn(Optional.of(id2)).anyTimes();
EasyMock.expect(taskStorage.getTask("id3")).andReturn(Optional.of(id3)).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KinesisDataSourceMetadata(null)).anyTimes();
EasyMock.expect(taskClient.getStatusAsync("id1")).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.READING));
EasyMock.expect(taskClient.getStatusAsync("id2")).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.READING));
EasyMock.expect(taskClient.getStatusAsync("id3")).andReturn(Futures.immediateFuture(SeekableStreamIndexTaskRunner.Status.READING));
final DateTime startTime = DateTimes.nowUtc();
EasyMock.expect(taskClient.getStartTimeAsync("id1")).andReturn(Futures.immediateFuture(startTime));
EasyMock.expect(taskClient.getStartTimeAsync("id2")).andReturn(Futures.immediateFuture(startTime));
EasyMock.expect(taskClient.getStartTimeAsync("id3")).andReturn(Futures.immediateFuture(startTime));
final TreeMap<Integer, Map<String, String>> checkpoints = new TreeMap<>();
checkpoints.put(0, ImmutableMap.of(SHARD_ID1, "10", SHARD_ID0, "20"));
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("id1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints)).times(1);
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("id2"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints)).times(1);
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("id3"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints)).times(1);
taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
replayAll();
supervisor.start();
supervisor.runInternal();
supervisor.moveTaskGroupToPendingCompletion(0);
supervisor.checkpoint(0, new KinesisDataSourceMetadata(new SeekableStreamStartSequenceNumbers<>(STREAM, checkpoints.get(0), checkpoints.get(0).keySet())));
while (supervisor.getNoticesQueueSize() > 0) {
Thread.sleep(100);
}
verifyAll();
Assert.assertNull(serviceEmitter.getStackTrace(), serviceEmitter.getStackTrace());
Assert.assertNull(serviceEmitter.getExceptionMessage(), serviceEmitter.getExceptionMessage());
Assert.assertNull(serviceEmitter.getExceptionClass());
}
use of org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers in project druid by druid-io.
the class KinesisSupervisorTest method testCheckpointForUnknownTaskGroup.
@Test(timeout = 60_000L)
public void testCheckpointForUnknownTaskGroup() throws InterruptedException {
supervisor = getTestableSupervisor(2, 1, true, "PT1S", null, null, false);
supervisorRecordSupplier.assign(EasyMock.anyObject());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(supervisorRecordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID1, SHARD_ID0)).anyTimes();
EasyMock.expect(supervisorRecordSupplier.getAssignment()).andReturn(ImmutableSet.of(SHARD1_PARTITION, SHARD0_PARTITION)).anyTimes();
supervisorRecordSupplier.seekToLatest(EasyMock.anyObject());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(supervisorRecordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(SHARD1_PARTITION)).andReturn("12").anyTimes();
EasyMock.expect(supervisorRecordSupplier.getLatestSequenceNumber(SHARD0_PARTITION)).andReturn("1").anyTimes();
supervisorRecordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
EasyMock.expectLastCall().anyTimes();
// not adding any events
final KinesisIndexTask id1 = createKinesisIndexTask("id1", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>("stream", ImmutableMap.of(SHARD_ID1, "0", SHARD_ID0, "0"), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>("stream", ImmutableMap.of(SHARD_ID1, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER, SHARD_ID0, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER)), null, null);
final Task id2 = createKinesisIndexTask("id2", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>("stream", ImmutableMap.of(SHARD_ID1, "10", SHARD_ID0, "20"), ImmutableSet.of(SHARD_ID0, SHARD_ID1)), new SeekableStreamEndSequenceNumbers<>("stream", ImmutableMap.of(SHARD_ID1, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER, SHARD_ID0, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER)), null, null);
final Task id3 = createKinesisIndexTask("id3", DATASOURCE, 0, new SeekableStreamStartSequenceNumbers<>("stream", ImmutableMap.of(SHARD_ID1, "10", SHARD_ID0, "20"), ImmutableSet.of(SHARD_ID0, SHARD_ID1)), new SeekableStreamEndSequenceNumbers<>("stream", ImmutableMap.of(SHARD_ID1, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER, SHARD_ID0, KinesisSequenceNumber.NO_END_SEQUENCE_NUMBER)), null, null);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of(id1, id2, id3)).anyTimes();
EasyMock.expect(taskStorage.getStatus("id1")).andReturn(Optional.of(TaskStatus.running("id1"))).anyTimes();
EasyMock.expect(taskStorage.getStatus("id2")).andReturn(Optional.of(TaskStatus.running("id2"))).anyTimes();
EasyMock.expect(taskStorage.getStatus("id3")).andReturn(Optional.of(TaskStatus.running("id3"))).anyTimes();
EasyMock.expect(taskStorage.getTask("id1")).andReturn(Optional.of(id1)).anyTimes();
EasyMock.expect(taskStorage.getTask("id2")).andReturn(Optional.of(id2)).anyTimes();
EasyMock.expect(taskStorage.getTask("id3")).andReturn(Optional.of(id3)).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KinesisDataSourceMetadata(null)).anyTimes();
replayAll();
supervisor.start();
supervisor.checkpoint(0, new KinesisDataSourceMetadata(new SeekableStreamStartSequenceNumbers<>(STREAM, Collections.emptyMap(), ImmutableSet.of())));
while (supervisor.getNoticesQueueSize() > 0) {
Thread.sleep(100);
}
verifyAll();
while (serviceEmitter.getStackTrace() == null) {
Thread.sleep(100);
}
Assert.assertTrue(serviceEmitter.getStackTrace().startsWith("org.apache.druid.java.util.common.ISE: Cannot find"));
Assert.assertEquals("Cannot find taskGroup [0] among all activelyReadingTaskGroups [{}]", serviceEmitter.getExceptionMessage());
Assert.assertEquals(ISE.class, serviceEmitter.getExceptionClass());
}
Aggregations