use of org.apache.druid.indexing.seekablestream.SeekableStreamIndexTask in project druid by druid-io.
the class SeekableStreamSupervisor method discoverTasks.
private void discoverTasks() throws ExecutionException, InterruptedException, TimeoutException {
int taskCount = 0;
List<String> futureTaskIds = new ArrayList<>();
List<ListenableFuture<Boolean>> futures = new ArrayList<>();
List<Task> tasks = taskStorage.getActiveTasksByDatasource(dataSource);
final Map<Integer, TaskGroup> taskGroupsToVerify = new HashMap<>();
for (Task task : tasks) {
if (!doesTaskTypeMatchSupervisor(task)) {
continue;
}
taskCount++;
@SuppressWarnings("unchecked") final SeekableStreamIndexTask<PartitionIdType, SequenceOffsetType, RecordType> seekableStreamIndexTask = (SeekableStreamIndexTask<PartitionIdType, SequenceOffsetType, RecordType>) task;
final String taskId = task.getId();
// having to map expired partitions which are no longer tracked in partitionIds to a task group.
if (supportsPartitionExpiration()) {
Set<PartitionIdType> taskPartitions = seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().keySet();
Set<PartitionIdType> inactivePartitionsInTask = Sets.difference(taskPartitions, new HashSet<>(partitionIds));
if (!inactivePartitionsInTask.isEmpty()) {
killTaskWithSuccess(taskId, "Task [%s] with partition set [%s] has inactive partitions [%s], stopping task.", taskId, taskPartitions, inactivePartitionsInTask);
continue;
}
}
// Determine which task group this task belongs to based on one of the partitions handled by this task. If we
// later determine that this task is actively reading, we will make sure that it matches our current partition
// allocation (getTaskGroupIdForPartition(partition) should return the same value for every partition being read
// by this task) and kill it if it is not compatible. If the task is instead found to be in the publishing
// state, we will permit it to complete even if it doesn't match our current partition allocation to support
// seamless schema migration.
Iterator<PartitionIdType> it = seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().keySet().iterator();
final Integer taskGroupId = (it.hasNext() ? getTaskGroupIdForPartition(it.next()) : null);
if (taskGroupId != null) {
// check to see if we already know about this task, either in [activelyReadingTaskGroups] or in [pendingCompletionTaskGroups]
// and if not add it to activelyReadingTaskGroups or pendingCompletionTaskGroups (if status = PUBLISHING)
TaskGroup taskGroup = activelyReadingTaskGroups.get(taskGroupId);
if (!isTaskInPendingCompletionGroups(taskId) && (taskGroup == null || !taskGroup.tasks.containsKey(taskId))) {
futureTaskIds.add(taskId);
futures.add(Futures.transform(taskClient.getStatusAsync(taskId), new Function<SeekableStreamIndexTaskRunner.Status, Boolean>() {
@Override
public Boolean apply(SeekableStreamIndexTaskRunner.Status status) {
try {
log.debug("Task [%s], status [%s]", taskId, status);
if (status == SeekableStreamIndexTaskRunner.Status.PUBLISHING) {
seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().keySet().forEach(partition -> addDiscoveredTaskToPendingCompletionTaskGroups(getTaskGroupIdForPartition(partition), taskId, seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap()));
// update partitionGroups with the publishing task's sequences (if they are greater than what is
// existing) so that the next tasks will start reading from where this task left off
Map<PartitionIdType, SequenceOffsetType> publishingTaskEndOffsets = taskClient.getEndOffsets(taskId);
// If we received invalid endOffset values, we clear the known offset to refetch the last committed offset
// from metadata. If any endOffset values are invalid, we treat the entire set as invalid as a safety measure.
boolean endOffsetsAreInvalid = false;
for (Entry<PartitionIdType, SequenceOffsetType> entry : publishingTaskEndOffsets.entrySet()) {
PartitionIdType partition = entry.getKey();
SequenceOffsetType sequence = entry.getValue();
if (sequence.equals(getEndOfPartitionMarker())) {
log.info("Got end of partition marker for partition [%s] from task [%s] in discoverTasks, clearing partition offset to refetch from metadata..", taskId, partition);
endOffsetsAreInvalid = true;
partitionOffsets.put(partition, getNotSetMarker());
}
}
if (!endOffsetsAreInvalid) {
for (Entry<PartitionIdType, SequenceOffsetType> entry : publishingTaskEndOffsets.entrySet()) {
PartitionIdType partition = entry.getKey();
SequenceOffsetType sequence = entry.getValue();
boolean succeeded;
do {
succeeded = true;
SequenceOffsetType previousOffset = partitionOffsets.putIfAbsent(partition, sequence);
if (previousOffset != null && (makeSequenceNumber(previousOffset).compareTo(makeSequenceNumber(sequence))) < 0) {
succeeded = partitionOffsets.replace(partition, previousOffset, sequence);
}
} while (!succeeded);
}
}
} else {
for (PartitionIdType partition : seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().keySet()) {
if (!taskGroupId.equals(getTaskGroupIdForPartition(partition))) {
log.warn("Stopping task [%s] which does not match the expected partition allocation", taskId);
try {
stopTask(taskId, false).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
} catch (InterruptedException | ExecutionException | TimeoutException e) {
stateManager.recordThrowableEvent(e);
log.warn(e, "Exception while stopping task");
}
return false;
}
}
// if it is current then only create corresponding taskGroup if it does not exist
if (!isTaskCurrent(taskGroupId, taskId)) {
log.info("Stopping task [%s] which does not match the expected parameters and ingestion spec", taskId);
try {
stopTask(taskId, false).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
} catch (InterruptedException | ExecutionException | TimeoutException e) {
stateManager.recordThrowableEvent(e);
log.warn(e, "Exception while stopping task");
}
return false;
} else {
final TaskGroup taskGroup = activelyReadingTaskGroups.computeIfAbsent(taskGroupId, k -> {
log.info("Creating a new task group for taskGroupId[%d]", taskGroupId);
// task group so that the replica segment allocations are the same.
return new TaskGroup(taskGroupId, ImmutableMap.copyOf(seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap()), null, seekableStreamIndexTask.getIOConfig().getMinimumMessageTime(), seekableStreamIndexTask.getIOConfig().getMaximumMessageTime(), seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getExclusivePartitions(), seekableStreamIndexTask.getIOConfig().getBaseSequenceName());
});
taskGroupsToVerify.put(taskGroupId, taskGroup);
final TaskData prevTaskData = taskGroup.tasks.putIfAbsent(taskId, new TaskData());
if (prevTaskData != null) {
throw new ISE("taskGroup[%s] already exists for new task[%s]", prevTaskData, taskId);
}
verifySameSequenceNameForAllTasksInGroup(taskGroupId);
}
}
return true;
} catch (Throwable t) {
stateManager.recordThrowableEvent(t);
log.error(t, "Something bad while discovering task [%s]", taskId);
return null;
}
}
}, workerExec));
}
}
}
List<Boolean> results = Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
for (int i = 0; i < results.size(); i++) {
if (results.get(i) == null) {
String taskId = futureTaskIds.get(i);
killTask(taskId, "Task [%s] failed to return status, killing task", taskId);
}
}
log.debug("Found [%d] seekablestream indexing tasks for dataSource [%s]", taskCount, dataSource);
// make sure the checkpoints are consistent with each other and with the metadata store
verifyAndMergeCheckpoints(taskGroupsToVerify.values());
}
use of org.apache.druid.indexing.seekablestream.SeekableStreamIndexTask in project druid by druid-io.
the class KinesisSupervisor method createIndexTasks.
@Override
protected List<SeekableStreamIndexTask<String, String, ByteEntity>> createIndexTasks(int replicas, String baseSequenceName, ObjectMapper sortingMapper, TreeMap<Integer, Map<String, String>> sequenceOffsets, SeekableStreamIndexTaskIOConfig taskIoConfig, SeekableStreamIndexTaskTuningConfig taskTuningConfig, RowIngestionMetersFactory rowIngestionMetersFactory) throws JsonProcessingException {
final String checkpoints = sortingMapper.writerFor(CHECKPOINTS_TYPE_REF).writeValueAsString(sequenceOffsets);
final Map<String, Object> context = createBaseTaskContexts();
context.put(CHECKPOINTS_CTX_KEY, checkpoints);
List<SeekableStreamIndexTask<String, String, ByteEntity>> taskList = new ArrayList<>();
for (int i = 0; i < replicas; i++) {
String taskId = IdUtils.getRandomIdWithPrefix(baseSequenceName);
taskList.add(new KinesisIndexTask(taskId, new TaskResource(baseSequenceName, 1), spec.getDataSchema(), (KinesisIndexTaskTuningConfig) taskTuningConfig, (KinesisIndexTaskIOConfig) taskIoConfig, context, awsCredentialsConfig));
}
return taskList;
}
use of org.apache.druid.indexing.seekablestream.SeekableStreamIndexTask in project druid by druid-io.
the class KafkaSupervisor method createIndexTasks.
@Override
protected List<SeekableStreamIndexTask<Integer, Long, KafkaRecordEntity>> createIndexTasks(int replicas, String baseSequenceName, ObjectMapper sortingMapper, TreeMap<Integer, Map<Integer, Long>> sequenceOffsets, SeekableStreamIndexTaskIOConfig taskIoConfig, SeekableStreamIndexTaskTuningConfig taskTuningConfig, RowIngestionMetersFactory rowIngestionMetersFactory) throws JsonProcessingException {
final String checkpoints = sortingMapper.writerFor(CHECKPOINTS_TYPE_REF).writeValueAsString(sequenceOffsets);
final Map<String, Object> context = createBaseTaskContexts();
context.put(CHECKPOINTS_CTX_KEY, checkpoints);
// Kafka index task always uses incremental handoff since 0.16.0.
// The below is for the compatibility when you want to downgrade your cluster to something earlier than 0.16.0.
// Kafka index task will pick up LegacyKafkaIndexTaskRunner without the below configuration.
context.put("IS_INCREMENTAL_HANDOFF_SUPPORTED", true);
List<SeekableStreamIndexTask<Integer, Long, KafkaRecordEntity>> taskList = new ArrayList<>();
for (int i = 0; i < replicas; i++) {
String taskId = IdUtils.getRandomIdWithPrefix(baseSequenceName);
taskList.add(new KafkaIndexTask(taskId, new TaskResource(baseSequenceName, 1), spec.getDataSchema(), (KafkaIndexTaskTuningConfig) taskTuningConfig, (KafkaIndexTaskIOConfig) taskIoConfig, context, sortingMapper));
}
return taskList;
}
use of org.apache.druid.indexing.seekablestream.SeekableStreamIndexTask in project druid by druid-io.
the class SeekableStreamSupervisor method createTasksForGroup.
private void createTasksForGroup(int groupId, int replicas) throws JsonProcessingException {
TaskGroup group = activelyReadingTaskGroups.get(groupId);
Map<PartitionIdType, SequenceOffsetType> startPartitions = group.startingSequences;
Map<PartitionIdType, SequenceOffsetType> endPartitions = new HashMap<>();
for (PartitionIdType partition : startPartitions.keySet()) {
endPartitions.put(partition, getEndOfPartitionMarker());
}
Set<PartitionIdType> exclusiveStartSequenceNumberPartitions = activelyReadingTaskGroups.get(groupId).exclusiveStartSequenceNumberPartitions;
DateTime minimumMessageTime = group.minimumMessageTime.orNull();
DateTime maximumMessageTime = group.maximumMessageTime.orNull();
SeekableStreamIndexTaskIOConfig newIoConfig = createTaskIoConfig(groupId, startPartitions, endPartitions, group.baseSequenceName, minimumMessageTime, maximumMessageTime, exclusiveStartSequenceNumberPartitions, ioConfig);
List<SeekableStreamIndexTask<PartitionIdType, SequenceOffsetType, RecordType>> taskList = createIndexTasks(replicas, group.baseSequenceName, sortingMapper, group.checkpointSequences, newIoConfig, taskTuningConfig, rowIngestionMetersFactory);
for (SeekableStreamIndexTask indexTask : taskList) {
Optional<TaskQueue> taskQueue = taskMaster.getTaskQueue();
if (taskQueue.isPresent()) {
try {
taskQueue.get().add(indexTask);
} catch (EntryExistsException e) {
stateManager.recordThrowableEvent(e);
log.error("Tried to add task [%s] but it already exists", indexTask.getId());
}
} else {
log.error("Failed to get task queue because I'm not the leader!");
}
}
}
Aggregations