use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.
the class TaskQueue method manageInternal.
@VisibleForTesting
void manageInternal() {
// Task futures available from the taskRunner
final Map<String, ListenableFuture<TaskStatus>> runnerTaskFutures = new HashMap<>();
for (final TaskRunnerWorkItem workItem : taskRunner.getKnownTasks()) {
runnerTaskFutures.put(workItem.getTaskId(), workItem.getResult());
}
// Copy tasks list, as notifyStatus may modify it.
for (final Task task : ImmutableList.copyOf(tasks)) {
if (!taskFutures.containsKey(task.getId())) {
final ListenableFuture<TaskStatus> runnerTaskFuture;
if (runnerTaskFutures.containsKey(task.getId())) {
runnerTaskFuture = runnerTaskFutures.get(task.getId());
} else {
// Task should be running, so run it.
final boolean taskIsReady;
try {
taskIsReady = task.isReady(taskActionClientFactory.create(task));
} catch (Exception e) {
log.warn(e, "Exception thrown during isReady for task: %s", task.getId());
final String errorMessage;
if (e instanceof MaxAllowedLocksExceededException) {
errorMessage = e.getMessage();
} else {
errorMessage = "Failed while waiting for the task to be ready to run. " + "See overlord logs for more details.";
}
notifyStatus(task, TaskStatus.failure(task.getId(), errorMessage), errorMessage);
continue;
}
if (taskIsReady) {
log.info("Asking taskRunner to run: %s", task.getId());
runnerTaskFuture = taskRunner.run(task);
} else {
// Task.isReady() can internally lock intervals or segments.
// We should release them if the task is not ready.
taskLockbox.unlockAll(task);
continue;
}
}
taskFutures.put(task.getId(), attachCallbacks(task, runnerTaskFuture));
} else if (isTaskPending(task)) {
// if the taskFutures contain this task and this task is pending, also let the taskRunner
// to run it to guarantee it will be assigned to run
// see https://github.com/apache/druid/pull/6991
taskRunner.run(task);
}
}
// Kill tasks that shouldn't be running
final Set<String> knownTaskIds = tasks.stream().map(Task::getId).collect(Collectors.toSet());
final Set<String> tasksToKill = Sets.difference(runnerTaskFutures.keySet(), knownTaskIds);
if (!tasksToKill.isEmpty()) {
log.info("Asking taskRunner to clean up %,d tasks.", tasksToKill.size());
for (final String taskId : tasksToKill) {
try {
taskRunner.shutdown(taskId, "task is not in knownTaskIds[%s]", knownTaskIds);
} catch (Exception e) {
log.warn(e, "TaskRunner failed to clean up task: %s", taskId);
}
}
}
}
use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.
the class HttpRemoteTaskRunner method scheduleCompletedTaskStatusCleanupFromZk.
private void scheduleCompletedTaskStatusCleanupFromZk() {
if (cf == null) {
return;
}
zkCleanupExec.scheduleAtFixedRate(() -> {
try {
List<String> workers;
try {
workers = cf.getChildren().forPath(indexerZkConfig.getStatusPath());
} catch (KeeperException.NoNodeException e) {
// statusPath doesn't exist yet; can occur if no middleManagers have started.
workers = ImmutableList.of();
}
Set<String> knownActiveTaskIds = new HashSet<>();
if (!workers.isEmpty()) {
for (Task task : taskStorage.getActiveTasks()) {
knownActiveTaskIds.add(task.getId());
}
}
for (String workerId : workers) {
String workerStatusPath = JOINER.join(indexerZkConfig.getStatusPath(), workerId);
List<String> taskIds;
try {
taskIds = cf.getChildren().forPath(workerStatusPath);
} catch (KeeperException.NoNodeException e) {
taskIds = ImmutableList.of();
}
for (String taskId : taskIds) {
if (!knownActiveTaskIds.contains(taskId)) {
String taskStatusPath = JOINER.join(workerStatusPath, taskId);
try {
cf.delete().guaranteed().forPath(taskStatusPath);
} catch (KeeperException.NoNodeException e) {
log.info("Failed to delete taskStatusPath[%s].", taskStatusPath);
}
}
}
}
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
} catch (Exception ex) {
log.error(ex, "Unknown error while doing task status cleanup in ZK.");
}
}, 1, 5, TimeUnit.MINUTES);
}
use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.
the class SeekableStreamSupervisor method updatePartitionDataFromStream.
private boolean updatePartitionDataFromStream() {
List<PartitionIdType> previousPartitionIds = new ArrayList<>(partitionIds);
Set<PartitionIdType> partitionIdsFromSupplier;
recordSupplierLock.lock();
try {
partitionIdsFromSupplier = recordSupplier.getPartitionIds(ioConfig.getStream());
if (shouldSkipIgnorablePartitions()) {
partitionIdsFromSupplier.removeAll(computeIgnorablePartitionIds());
}
} catch (Exception e) {
stateManager.recordThrowableEvent(e);
log.warn("Could not fetch partitions for topic/stream [%s]: %s", ioConfig.getStream(), e.getMessage());
log.debug(e, "full stack trace");
return false;
} finally {
recordSupplierLock.unlock();
}
if (partitionIdsFromSupplier == null || partitionIdsFromSupplier.size() == 0) {
String errMsg = StringUtils.format("No partitions found for stream [%s]", ioConfig.getStream());
stateManager.recordThrowableEvent(new StreamException(new ISE(errMsg)));
log.warn(errMsg);
return false;
}
log.debug("Found [%d] partitions for stream [%s]", partitionIdsFromSupplier.size(), ioConfig.getStream());
Map<PartitionIdType, SequenceOffsetType> storedMetadata = getOffsetsFromMetadataStorage();
Set<PartitionIdType> storedPartitions = storedMetadata.keySet();
Set<PartitionIdType> closedPartitions = storedMetadata.entrySet().stream().filter(x -> isEndOfShard(x.getValue())).map(Entry::getKey).collect(Collectors.toSet());
Set<PartitionIdType> previouslyExpiredPartitions = storedMetadata.entrySet().stream().filter(x -> isShardExpirationMarker(x.getValue())).map(Entry::getKey).collect(Collectors.toSet());
Set<PartitionIdType> partitionIdsFromSupplierWithoutPreviouslyExpiredPartitions = Sets.difference(partitionIdsFromSupplier, previouslyExpiredPartitions);
Set<PartitionIdType> activePartitionsIdsFromSupplier = Sets.difference(partitionIdsFromSupplierWithoutPreviouslyExpiredPartitions, closedPartitions);
Set<PartitionIdType> newlyClosedPartitions = Sets.intersection(closedPartitions, new HashSet<>(previousPartitionIds));
log.debug("active partitions from supplier: " + activePartitionsIdsFromSupplier);
if (partitionIdsFromSupplierWithoutPreviouslyExpiredPartitions.size() != partitionIdsFromSupplier.size()) {
// this should never happen, but we check for it and exclude the expired partitions if they somehow reappear
log.warn("Previously expired partitions [%s] were present in the current list [%s] from the record supplier.", previouslyExpiredPartitions, partitionIdsFromSupplier);
}
if (activePartitionsIdsFromSupplier.size() == 0) {
String errMsg = StringUtils.format("No active partitions found for stream [%s] after removing closed and previously expired partitions", ioConfig.getStream());
stateManager.recordThrowableEvent(new StreamException(new ISE(errMsg)));
log.warn(errMsg);
return false;
}
boolean initialPartitionDiscovery = this.partitionIds.isEmpty();
for (PartitionIdType partitionId : partitionIdsFromSupplierWithoutPreviouslyExpiredPartitions) {
if (closedPartitions.contains(partitionId)) {
log.info("partition [%s] is closed and has no more data, skipping.", partitionId);
continue;
}
if (!this.partitionIds.contains(partitionId)) {
partitionIds.add(partitionId);
if (!initialPartitionDiscovery) {
subsequentlyDiscoveredPartitions.add(partitionId);
}
}
}
// partitions across tasks.
if (supportsPartitionExpiration()) {
cleanupClosedAndExpiredPartitions(storedPartitions, newlyClosedPartitions, activePartitionsIdsFromSupplier, previouslyExpiredPartitions, partitionIdsFromSupplier);
}
Int2ObjectMap<List<PartitionIdType>> newlyDiscovered = new Int2ObjectLinkedOpenHashMap<>();
for (PartitionIdType partitionId : activePartitionsIdsFromSupplier) {
int taskGroupId = getTaskGroupIdForPartition(partitionId);
Set<PartitionIdType> partitionGroup = partitionGroups.computeIfAbsent(taskGroupId, k -> new HashSet<>());
partitionGroup.add(partitionId);
if (partitionOffsets.putIfAbsent(partitionId, getNotSetMarker()) == null) {
log.debug("New partition [%s] discovered for stream [%s], added to task group [%d]", partitionId, ioConfig.getStream(), taskGroupId);
newlyDiscovered.computeIfAbsent(taskGroupId, k -> new ArrayList<>()).add(partitionId);
}
}
if (newlyDiscovered.size() > 0) {
for (Int2ObjectMap.Entry<List<PartitionIdType>> taskGroupPartitions : newlyDiscovered.int2ObjectEntrySet()) {
log.info("New partitions %s discovered for stream [%s], added to task group [%s]", taskGroupPartitions.getValue(), ioConfig.getStream(), taskGroupPartitions.getIntKey());
}
}
if (!partitionIds.equals(previousPartitionIds)) {
assignRecordSupplierToPartitionIds();
// repartitioning quickly by creating new tasks
for (TaskGroup taskGroup : activelyReadingTaskGroups.values()) {
if (!taskGroup.taskIds().isEmpty()) {
// Partitions have changed and we are managing active tasks - set an early publish time
// at the current time + repartitionTransitionDuration.
// This allows time for the stream to start writing to the new partitions after repartitioning.
// For Kinesis ingestion, this cooldown time is particularly useful, lowering the possibility of
// the new shards being empty, which can cause issues presently
// (see https://github.com/apache/druid/issues/7600)
earlyStopTime = DateTimes.nowUtc().plus(tuningConfig.getRepartitionTransitionDuration());
log.info("Previous partition set [%s] has changed to [%s] - requesting that tasks stop after [%s] at [%s]", previousPartitionIds, partitionIds, tuningConfig.getRepartitionTransitionDuration(), earlyStopTime);
break;
}
}
}
return true;
}
use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.
the class SeekableStreamSupervisor method discoverTasks.
private void discoverTasks() throws ExecutionException, InterruptedException, TimeoutException {
int taskCount = 0;
List<String> futureTaskIds = new ArrayList<>();
List<ListenableFuture<Boolean>> futures = new ArrayList<>();
List<Task> tasks = taskStorage.getActiveTasksByDatasource(dataSource);
final Map<Integer, TaskGroup> taskGroupsToVerify = new HashMap<>();
for (Task task : tasks) {
if (!doesTaskTypeMatchSupervisor(task)) {
continue;
}
taskCount++;
@SuppressWarnings("unchecked") final SeekableStreamIndexTask<PartitionIdType, SequenceOffsetType, RecordType> seekableStreamIndexTask = (SeekableStreamIndexTask<PartitionIdType, SequenceOffsetType, RecordType>) task;
final String taskId = task.getId();
// having to map expired partitions which are no longer tracked in partitionIds to a task group.
if (supportsPartitionExpiration()) {
Set<PartitionIdType> taskPartitions = seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().keySet();
Set<PartitionIdType> inactivePartitionsInTask = Sets.difference(taskPartitions, new HashSet<>(partitionIds));
if (!inactivePartitionsInTask.isEmpty()) {
killTaskWithSuccess(taskId, "Task [%s] with partition set [%s] has inactive partitions [%s], stopping task.", taskId, taskPartitions, inactivePartitionsInTask);
continue;
}
}
// Determine which task group this task belongs to based on one of the partitions handled by this task. If we
// later determine that this task is actively reading, we will make sure that it matches our current partition
// allocation (getTaskGroupIdForPartition(partition) should return the same value for every partition being read
// by this task) and kill it if it is not compatible. If the task is instead found to be in the publishing
// state, we will permit it to complete even if it doesn't match our current partition allocation to support
// seamless schema migration.
Iterator<PartitionIdType> it = seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().keySet().iterator();
final Integer taskGroupId = (it.hasNext() ? getTaskGroupIdForPartition(it.next()) : null);
if (taskGroupId != null) {
// check to see if we already know about this task, either in [activelyReadingTaskGroups] or in [pendingCompletionTaskGroups]
// and if not add it to activelyReadingTaskGroups or pendingCompletionTaskGroups (if status = PUBLISHING)
TaskGroup taskGroup = activelyReadingTaskGroups.get(taskGroupId);
if (!isTaskInPendingCompletionGroups(taskId) && (taskGroup == null || !taskGroup.tasks.containsKey(taskId))) {
futureTaskIds.add(taskId);
futures.add(Futures.transform(taskClient.getStatusAsync(taskId), new Function<SeekableStreamIndexTaskRunner.Status, Boolean>() {
@Override
public Boolean apply(SeekableStreamIndexTaskRunner.Status status) {
try {
log.debug("Task [%s], status [%s]", taskId, status);
if (status == SeekableStreamIndexTaskRunner.Status.PUBLISHING) {
seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().keySet().forEach(partition -> addDiscoveredTaskToPendingCompletionTaskGroups(getTaskGroupIdForPartition(partition), taskId, seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap()));
// update partitionGroups with the publishing task's sequences (if they are greater than what is
// existing) so that the next tasks will start reading from where this task left off
Map<PartitionIdType, SequenceOffsetType> publishingTaskEndOffsets = taskClient.getEndOffsets(taskId);
// If we received invalid endOffset values, we clear the known offset to refetch the last committed offset
// from metadata. If any endOffset values are invalid, we treat the entire set as invalid as a safety measure.
boolean endOffsetsAreInvalid = false;
for (Entry<PartitionIdType, SequenceOffsetType> entry : publishingTaskEndOffsets.entrySet()) {
PartitionIdType partition = entry.getKey();
SequenceOffsetType sequence = entry.getValue();
if (sequence.equals(getEndOfPartitionMarker())) {
log.info("Got end of partition marker for partition [%s] from task [%s] in discoverTasks, clearing partition offset to refetch from metadata..", taskId, partition);
endOffsetsAreInvalid = true;
partitionOffsets.put(partition, getNotSetMarker());
}
}
if (!endOffsetsAreInvalid) {
for (Entry<PartitionIdType, SequenceOffsetType> entry : publishingTaskEndOffsets.entrySet()) {
PartitionIdType partition = entry.getKey();
SequenceOffsetType sequence = entry.getValue();
boolean succeeded;
do {
succeeded = true;
SequenceOffsetType previousOffset = partitionOffsets.putIfAbsent(partition, sequence);
if (previousOffset != null && (makeSequenceNumber(previousOffset).compareTo(makeSequenceNumber(sequence))) < 0) {
succeeded = partitionOffsets.replace(partition, previousOffset, sequence);
}
} while (!succeeded);
}
}
} else {
for (PartitionIdType partition : seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().keySet()) {
if (!taskGroupId.equals(getTaskGroupIdForPartition(partition))) {
log.warn("Stopping task [%s] which does not match the expected partition allocation", taskId);
try {
stopTask(taskId, false).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
} catch (InterruptedException | ExecutionException | TimeoutException e) {
stateManager.recordThrowableEvent(e);
log.warn(e, "Exception while stopping task");
}
return false;
}
}
// if it is current then only create corresponding taskGroup if it does not exist
if (!isTaskCurrent(taskGroupId, taskId)) {
log.info("Stopping task [%s] which does not match the expected parameters and ingestion spec", taskId);
try {
stopTask(taskId, false).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
} catch (InterruptedException | ExecutionException | TimeoutException e) {
stateManager.recordThrowableEvent(e);
log.warn(e, "Exception while stopping task");
}
return false;
} else {
final TaskGroup taskGroup = activelyReadingTaskGroups.computeIfAbsent(taskGroupId, k -> {
log.info("Creating a new task group for taskGroupId[%d]", taskGroupId);
// task group so that the replica segment allocations are the same.
return new TaskGroup(taskGroupId, ImmutableMap.copyOf(seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap()), null, seekableStreamIndexTask.getIOConfig().getMinimumMessageTime(), seekableStreamIndexTask.getIOConfig().getMaximumMessageTime(), seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getExclusivePartitions(), seekableStreamIndexTask.getIOConfig().getBaseSequenceName());
});
taskGroupsToVerify.put(taskGroupId, taskGroup);
final TaskData prevTaskData = taskGroup.tasks.putIfAbsent(taskId, new TaskData());
if (prevTaskData != null) {
throw new ISE("taskGroup[%s] already exists for new task[%s]", prevTaskData, taskId);
}
verifySameSequenceNameForAllTasksInGroup(taskGroupId);
}
}
return true;
} catch (Throwable t) {
stateManager.recordThrowableEvent(t);
log.error(t, "Something bad while discovering task [%s]", taskId);
return null;
}
}
}, workerExec));
}
}
}
List<Boolean> results = Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
for (int i = 0; i < results.size(); i++) {
if (results.get(i) == null) {
String taskId = futureTaskIds.get(i);
killTask(taskId, "Task [%s] failed to return status, killing task", taskId);
}
}
log.debug("Found [%d] seekablestream indexing tasks for dataSource [%s]", taskCount, dataSource);
// make sure the checkpoints are consistent with each other and with the metadata store
verifyAndMergeCheckpoints(taskGroupsToVerify.values());
}
use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.
the class SegmentAllocateActionTest method testSameIntervalWithSegmentGranularity.
@Test
public void testSameIntervalWithSegmentGranularity() {
final Task task = NoopTask.create();
taskActionTestKit.getTaskLockbox().add(task);
Granularity segmentGranularity = new PeriodGranularity(Period.hours(1), null, DateTimes.inferTzFromString("Asia/Shanghai"));
final SegmentIdWithShardSpec id1 = allocate(task, PARTY_TIME, Granularities.MINUTE, segmentGranularity, "s1", null);
final SegmentIdWithShardSpec id2 = allocate(task, PARTY_TIME, Granularities.MINUTE, segmentGranularity, "s2", null);
Assert.assertNotNull(id1);
Assert.assertNotNull(id2);
}
Aggregations