use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.
the class KafkaSupervisor method updatePartitionLagFromStream.
@Override
protected void updatePartitionLagFromStream() {
getRecordSupplierLock().lock();
try {
Set<Integer> partitionIds;
try {
partitionIds = recordSupplier.getPartitionIds(getIoConfig().getStream());
} catch (Exception e) {
log.warn("Could not fetch partitions for topic/stream [%s]", getIoConfig().getStream());
throw new StreamException(e);
}
Set<StreamPartition<Integer>> partitions = partitionIds.stream().map(e -> new StreamPartition<>(getIoConfig().getStream(), e)).collect(Collectors.toSet());
recordSupplier.seekToLatest(partitions);
// this method isn't actually computing the lag, just fetching the latests offsets from the stream. This is
// because we currently only have record lag for kafka, which can be lazily computed by subtracting the highest
// task offsets from the latest offsets from the stream when it is needed
latestSequenceFromStream = partitions.stream().collect(Collectors.toMap(StreamPartition::getPartitionId, recordSupplier::getPosition));
} catch (InterruptedException e) {
throw new StreamException(e);
} finally {
getRecordSupplierLock().unlock();
}
}
use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.
the class KafkaIndexTaskTest method testSerde.
@Test
public void testSerde() throws Exception {
// This is both a serde test and a regression test for https://github.com/apache/druid/issues/7724.
final KafkaIndexTask task = createTask("taskid", NEW_DATA_SCHEMA.withTransformSpec(new TransformSpec(null, ImmutableList.of(new ExpressionTransform("beep", "nofunc()", ExprMacroTable.nil())))), new KafkaIndexTaskIOConfig(0, "sequence", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of()), ImmutableMap.of(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, INPUT_FORMAT));
final Task task1 = OBJECT_MAPPER.readValue(OBJECT_MAPPER.writeValueAsBytes(task), Task.class);
Assert.assertEquals(task, task1);
}
use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.
the class KinesisIndexTaskTest method testMultipleParseExceptionsFailure.
@Test(timeout = 120_000L)
public void testMultipleParseExceptionsFailure() throws Exception {
reportParseExceptions = false;
maxParseExceptions = 2;
maxSavedParseExceptions = 2;
recordSupplier.assign(EasyMock.anyObject());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(recordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
recordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(recordSupplier.poll(EasyMock.anyLong())).andReturn(generateRecords(2, 13)).once();
recordSupplier.close();
EasyMock.expectLastCall().once();
replayAll();
final KinesisIndexTask task = createTask(null, new KinesisIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "2"), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "9")), true, null, null, INPUT_FORMAT, "awsEndpoint", null, null, null, null, false));
final ListenableFuture<TaskStatus> future = runTask(task);
TaskStatus status = future.get();
// Wait for task to exit
Assert.assertEquals(TaskState.FAILED, status.getStatusCode());
verifyAll();
IndexTaskTest.checkTaskStatusErrorMsgForParseExceptionsExceeded(status);
// Check metrics
Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getProcessed());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getProcessedWithError());
Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getUnparseable());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getThrownAway());
// Check published metadata
Assert.assertEquals(ImmutableList.of(), publishedDescriptors());
Assert.assertNull(newDataSchemaMetadata());
IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED, 3, RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.UNPARSEABLE, 3, RowIngestionMeters.THROWN_AWAY, 0));
Assert.assertEquals(expectedMetrics, reportData.getRowStats());
List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
List<String> expectedMessages = Arrays.asList("Unable to parse [] as the intermediateRow resulted in empty input row (Record: 1)", "Unable to parse row [unparseable] (Record: 1)");
List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
return ((List<String>) r.get("details")).get(0);
}).collect(Collectors.toList());
Assert.assertEquals(expectedMessages, actualMessages);
List<String> expectedInputs = Arrays.asList("", "unparseable");
List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
return (String) r.get("input");
}).collect(Collectors.toList());
Assert.assertEquals(expectedInputs, actualInputs);
}
use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.
the class KafkaSupervisorTest method testKillUnresponsiveTasksWhileSettingEndOffsets.
@Test
public void testKillUnresponsiveTasksWhileSettingEndOffsets() throws Exception {
final TaskLocation location = new TaskLocation("testHost", 1234, -1);
supervisor = getTestableSupervisor(2, 2, true, "PT1M", null, null);
addSomeEvents(100);
Capture<Task> captured = Capture.newInstance(CaptureType.ALL);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true).times(4);
taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
replayAll();
supervisor.start();
supervisor.runInternal();
verifyAll();
List<Task> tasks = captured.getValues();
Collection workItems = new ArrayList<>();
for (Task task : tasks) {
workItems.add(new TestTaskRunnerWorkItem(task, null, location));
}
EasyMock.reset(taskStorage, taskRunner, taskClient, taskQueue);
TreeMap<Integer, Map<Integer, Long>> checkpoints1 = new TreeMap<>();
checkpoints1.put(0, ImmutableMap.of(0, 0L, 2, 0L));
TreeMap<Integer, Map<Integer, Long>> checkpoints2 = new TreeMap<>();
checkpoints2.put(0, ImmutableMap.of(1, 0L));
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints1)).times(2);
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints2)).times(2);
captured = Capture.newInstance(CaptureType.ALL);
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(tasks).anyTimes();
for (Task task : tasks) {
EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
}
EasyMock.expect(taskRunner.getRunningTasks()).andReturn(workItems).anyTimes();
EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(Status.READING)).anyTimes();
EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.contains("sequenceName-0"))).andReturn(Futures.immediateFuture(DateTimes.nowUtc().minusMinutes(2))).andReturn(Futures.immediateFuture(DateTimes.nowUtc()));
EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.contains("sequenceName-1"))).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).times(2);
EasyMock.expect(taskClient.pauseAsync(EasyMock.contains("sequenceName-0"))).andReturn(Futures.immediateFuture(ImmutableMap.of(0, 10L, 1, 20L, 2, 30L))).andReturn(Futures.immediateFuture(ImmutableMap.of(0, 10L, 1, 15L, 2, 35L)));
EasyMock.expect(taskClient.setEndOffsetsAsync(EasyMock.contains("sequenceName-0"), EasyMock.eq(ImmutableMap.of(0, 10L, 1, 20L, 2, 35L)), EasyMock.eq(true))).andReturn(Futures.immediateFailedFuture(new RuntimeException())).times(2);
taskQueue.shutdown(EasyMock.contains("sequenceName-0"), EasyMock.eq("Task [%s] failed to respond to [set end offsets] in a timely manner, killing task"), EasyMock.contains("sequenceName-0"));
EasyMock.expectLastCall().times(2);
EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true).times(2);
EasyMock.replay(taskStorage, taskRunner, taskClient, taskQueue);
supervisor.runInternal();
verifyAll();
for (Task task : captured.getValues()) {
KafkaIndexTaskIOConfig taskConfig = ((KafkaIndexTask) task).getIOConfig();
Assert.assertEquals(0L, (long) taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(0));
Assert.assertEquals(0L, (long) taskConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(2));
}
}
use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.
the class KafkaSupervisorTest method testRequeueTaskWhenFailed.
@Test
public void testRequeueTaskWhenFailed() throws Exception {
supervisor = getTestableSupervisor(2, 2, true, "PT1H", null, null);
addSomeEvents(1);
Capture<Task> captured = Capture.newInstance(CaptureType.ALL);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes();
EasyMock.expect(taskRunner.getRunningTasks()).andReturn(Collections.emptyList()).anyTimes();
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(ImmutableList.of()).anyTimes();
EasyMock.expect(taskClient.getStatusAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(Status.NOT_STARTED)).anyTimes();
EasyMock.expect(taskClient.getStartTimeAsync(EasyMock.anyString())).andReturn(Futures.immediateFuture(DateTimes.nowUtc())).anyTimes();
EasyMock.expect(indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(DATASOURCE)).andReturn(new KafkaDataSourceMetadata(null)).anyTimes();
EasyMock.expect(taskQueue.add(EasyMock.capture(captured))).andReturn(true).times(4);
TreeMap<Integer, Map<Integer, Long>> checkpoints1 = new TreeMap<>();
checkpoints1.put(0, ImmutableMap.of(0, 0L, 2, 0L));
TreeMap<Integer, Map<Integer, Long>> checkpoints2 = new TreeMap<>();
checkpoints2.put(0, ImmutableMap.of(1, 0L));
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-0"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints1)).anyTimes();
EasyMock.expect(taskClient.getCheckpointsAsync(EasyMock.contains("sequenceName-1"), EasyMock.anyBoolean())).andReturn(Futures.immediateFuture(checkpoints2)).anyTimes();
taskRunner.registerListener(EasyMock.anyObject(TaskRunnerListener.class), EasyMock.anyObject(Executor.class));
replayAll();
supervisor.start();
supervisor.runInternal();
verifyAll();
List<Task> tasks = captured.getValues();
// test that running the main loop again checks the status of the tasks that were created and does nothing if they
// are all still running
EasyMock.reset(taskStorage);
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(tasks).anyTimes();
for (Task task : tasks) {
EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
}
EasyMock.replay(taskStorage);
supervisor.runInternal();
verifyAll();
// test that a task failing causes a new task to be re-queued with the same parameters
Capture<Task> aNewTaskCapture = Capture.newInstance();
List<Task> imStillAlive = tasks.subList(0, 3);
KafkaIndexTask iHaveFailed = (KafkaIndexTask) tasks.get(3);
EasyMock.reset(taskStorage);
EasyMock.reset(taskQueue);
EasyMock.expect(taskStorage.getActiveTasksByDatasource(DATASOURCE)).andReturn(imStillAlive).anyTimes();
for (Task task : imStillAlive) {
EasyMock.expect(taskStorage.getStatus(task.getId())).andReturn(Optional.of(TaskStatus.running(task.getId()))).anyTimes();
EasyMock.expect(taskStorage.getTask(task.getId())).andReturn(Optional.of(task)).anyTimes();
}
EasyMock.expect(taskStorage.getStatus(iHaveFailed.getId())).andReturn(Optional.of(TaskStatus.failure(iHaveFailed.getId(), "Dummy task status failure err message")));
EasyMock.expect(taskStorage.getTask(iHaveFailed.getId())).andReturn(Optional.of(iHaveFailed)).anyTimes();
EasyMock.expect(taskQueue.add(EasyMock.capture(aNewTaskCapture))).andReturn(true);
EasyMock.replay(taskStorage);
EasyMock.replay(taskQueue);
supervisor.runInternal();
verifyAll();
Assert.assertNotEquals(iHaveFailed.getId(), aNewTaskCapture.getValue().getId());
Assert.assertEquals(iHaveFailed.getIOConfig().getBaseSequenceName(), ((KafkaIndexTask) aNewTaskCapture.getValue()).getIOConfig().getBaseSequenceName());
}
Aggregations