Search in sources :

Example 6 with Task

use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.

the class TaskQueue method manageInternal.

@VisibleForTesting
void manageInternal() {
    // Task futures available from the taskRunner
    final Map<String, ListenableFuture<TaskStatus>> runnerTaskFutures = new HashMap<>();
    for (final TaskRunnerWorkItem workItem : taskRunner.getKnownTasks()) {
        runnerTaskFutures.put(workItem.getTaskId(), workItem.getResult());
    }
    // Copy tasks list, as notifyStatus may modify it.
    for (final Task task : ImmutableList.copyOf(tasks)) {
        if (!taskFutures.containsKey(task.getId())) {
            final ListenableFuture<TaskStatus> runnerTaskFuture;
            if (runnerTaskFutures.containsKey(task.getId())) {
                runnerTaskFuture = runnerTaskFutures.get(task.getId());
            } else {
                // Task should be running, so run it.
                final boolean taskIsReady;
                try {
                    taskIsReady = task.isReady(taskActionClientFactory.create(task));
                } catch (Exception e) {
                    log.warn(e, "Exception thrown during isReady for task: %s", task.getId());
                    final String errorMessage;
                    if (e instanceof MaxAllowedLocksExceededException) {
                        errorMessage = e.getMessage();
                    } else {
                        errorMessage = "Failed while waiting for the task to be ready to run. " + "See overlord logs for more details.";
                    }
                    notifyStatus(task, TaskStatus.failure(task.getId(), errorMessage), errorMessage);
                    continue;
                }
                if (taskIsReady) {
                    log.info("Asking taskRunner to run: %s", task.getId());
                    runnerTaskFuture = taskRunner.run(task);
                } else {
                    // Task.isReady() can internally lock intervals or segments.
                    // We should release them if the task is not ready.
                    taskLockbox.unlockAll(task);
                    continue;
                }
            }
            taskFutures.put(task.getId(), attachCallbacks(task, runnerTaskFuture));
        } else if (isTaskPending(task)) {
            // if the taskFutures contain this task and this task is pending, also let the taskRunner
            // to run it to guarantee it will be assigned to run
            // see https://github.com/apache/druid/pull/6991
            taskRunner.run(task);
        }
    }
    // Kill tasks that shouldn't be running
    final Set<String> knownTaskIds = tasks.stream().map(Task::getId).collect(Collectors.toSet());
    final Set<String> tasksToKill = Sets.difference(runnerTaskFutures.keySet(), knownTaskIds);
    if (!tasksToKill.isEmpty()) {
        log.info("Asking taskRunner to clean up %,d tasks.", tasksToKill.size());
        for (final String taskId : tasksToKill) {
            try {
                taskRunner.shutdown(taskId, "task is not in knownTaskIds[%s]", knownTaskIds);
            } catch (Exception e) {
                log.warn(e, "TaskRunner failed to clean up task: %s", taskId);
            }
        }
    }
}
Also used : Task(org.apache.druid.indexing.common.task.Task) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) MaxAllowedLocksExceededException(org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) TaskStatus(org.apache.druid.indexer.TaskStatus) EntryExistsException(org.apache.druid.metadata.EntryExistsException) MaxAllowedLocksExceededException(org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 7 with Task

use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.

the class HttpRemoteTaskRunner method scheduleCompletedTaskStatusCleanupFromZk.

private void scheduleCompletedTaskStatusCleanupFromZk() {
    if (cf == null) {
        return;
    }
    zkCleanupExec.scheduleAtFixedRate(() -> {
        try {
            List<String> workers;
            try {
                workers = cf.getChildren().forPath(indexerZkConfig.getStatusPath());
            } catch (KeeperException.NoNodeException e) {
                // statusPath doesn't exist yet; can occur if no middleManagers have started.
                workers = ImmutableList.of();
            }
            Set<String> knownActiveTaskIds = new HashSet<>();
            if (!workers.isEmpty()) {
                for (Task task : taskStorage.getActiveTasks()) {
                    knownActiveTaskIds.add(task.getId());
                }
            }
            for (String workerId : workers) {
                String workerStatusPath = JOINER.join(indexerZkConfig.getStatusPath(), workerId);
                List<String> taskIds;
                try {
                    taskIds = cf.getChildren().forPath(workerStatusPath);
                } catch (KeeperException.NoNodeException e) {
                    taskIds = ImmutableList.of();
                }
                for (String taskId : taskIds) {
                    if (!knownActiveTaskIds.contains(taskId)) {
                        String taskStatusPath = JOINER.join(workerStatusPath, taskId);
                        try {
                            cf.delete().guaranteed().forPath(taskStatusPath);
                        } catch (KeeperException.NoNodeException e) {
                            log.info("Failed to delete taskStatusPath[%s].", taskStatusPath);
                        }
                    }
                }
            }
        } catch (InterruptedException ex) {
            Thread.currentThread().interrupt();
        } catch (Exception ex) {
            log.error(ex, "Unknown error while doing task status cleanup in ZK.");
        }
    }, 1, 5, TimeUnit.MINUTES);
}
Also used : Task(org.apache.druid.indexing.common.task.Task) KeeperException(org.apache.zookeeper.KeeperException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) HashSet(java.util.HashSet)

Example 8 with Task

use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.

the class SeekableStreamSupervisor method updatePartitionDataFromStream.

private boolean updatePartitionDataFromStream() {
    List<PartitionIdType> previousPartitionIds = new ArrayList<>(partitionIds);
    Set<PartitionIdType> partitionIdsFromSupplier;
    recordSupplierLock.lock();
    try {
        partitionIdsFromSupplier = recordSupplier.getPartitionIds(ioConfig.getStream());
        if (shouldSkipIgnorablePartitions()) {
            partitionIdsFromSupplier.removeAll(computeIgnorablePartitionIds());
        }
    } catch (Exception e) {
        stateManager.recordThrowableEvent(e);
        log.warn("Could not fetch partitions for topic/stream [%s]: %s", ioConfig.getStream(), e.getMessage());
        log.debug(e, "full stack trace");
        return false;
    } finally {
        recordSupplierLock.unlock();
    }
    if (partitionIdsFromSupplier == null || partitionIdsFromSupplier.size() == 0) {
        String errMsg = StringUtils.format("No partitions found for stream [%s]", ioConfig.getStream());
        stateManager.recordThrowableEvent(new StreamException(new ISE(errMsg)));
        log.warn(errMsg);
        return false;
    }
    log.debug("Found [%d] partitions for stream [%s]", partitionIdsFromSupplier.size(), ioConfig.getStream());
    Map<PartitionIdType, SequenceOffsetType> storedMetadata = getOffsetsFromMetadataStorage();
    Set<PartitionIdType> storedPartitions = storedMetadata.keySet();
    Set<PartitionIdType> closedPartitions = storedMetadata.entrySet().stream().filter(x -> isEndOfShard(x.getValue())).map(Entry::getKey).collect(Collectors.toSet());
    Set<PartitionIdType> previouslyExpiredPartitions = storedMetadata.entrySet().stream().filter(x -> isShardExpirationMarker(x.getValue())).map(Entry::getKey).collect(Collectors.toSet());
    Set<PartitionIdType> partitionIdsFromSupplierWithoutPreviouslyExpiredPartitions = Sets.difference(partitionIdsFromSupplier, previouslyExpiredPartitions);
    Set<PartitionIdType> activePartitionsIdsFromSupplier = Sets.difference(partitionIdsFromSupplierWithoutPreviouslyExpiredPartitions, closedPartitions);
    Set<PartitionIdType> newlyClosedPartitions = Sets.intersection(closedPartitions, new HashSet<>(previousPartitionIds));
    log.debug("active partitions from supplier: " + activePartitionsIdsFromSupplier);
    if (partitionIdsFromSupplierWithoutPreviouslyExpiredPartitions.size() != partitionIdsFromSupplier.size()) {
        // this should never happen, but we check for it and exclude the expired partitions if they somehow reappear
        log.warn("Previously expired partitions [%s] were present in the current list [%s] from the record supplier.", previouslyExpiredPartitions, partitionIdsFromSupplier);
    }
    if (activePartitionsIdsFromSupplier.size() == 0) {
        String errMsg = StringUtils.format("No active partitions found for stream [%s] after removing closed and previously expired partitions", ioConfig.getStream());
        stateManager.recordThrowableEvent(new StreamException(new ISE(errMsg)));
        log.warn(errMsg);
        return false;
    }
    boolean initialPartitionDiscovery = this.partitionIds.isEmpty();
    for (PartitionIdType partitionId : partitionIdsFromSupplierWithoutPreviouslyExpiredPartitions) {
        if (closedPartitions.contains(partitionId)) {
            log.info("partition [%s] is closed and has no more data, skipping.", partitionId);
            continue;
        }
        if (!this.partitionIds.contains(partitionId)) {
            partitionIds.add(partitionId);
            if (!initialPartitionDiscovery) {
                subsequentlyDiscoveredPartitions.add(partitionId);
            }
        }
    }
    // partitions across tasks.
    if (supportsPartitionExpiration()) {
        cleanupClosedAndExpiredPartitions(storedPartitions, newlyClosedPartitions, activePartitionsIdsFromSupplier, previouslyExpiredPartitions, partitionIdsFromSupplier);
    }
    Int2ObjectMap<List<PartitionIdType>> newlyDiscovered = new Int2ObjectLinkedOpenHashMap<>();
    for (PartitionIdType partitionId : activePartitionsIdsFromSupplier) {
        int taskGroupId = getTaskGroupIdForPartition(partitionId);
        Set<PartitionIdType> partitionGroup = partitionGroups.computeIfAbsent(taskGroupId, k -> new HashSet<>());
        partitionGroup.add(partitionId);
        if (partitionOffsets.putIfAbsent(partitionId, getNotSetMarker()) == null) {
            log.debug("New partition [%s] discovered for stream [%s], added to task group [%d]", partitionId, ioConfig.getStream(), taskGroupId);
            newlyDiscovered.computeIfAbsent(taskGroupId, k -> new ArrayList<>()).add(partitionId);
        }
    }
    if (newlyDiscovered.size() > 0) {
        for (Int2ObjectMap.Entry<List<PartitionIdType>> taskGroupPartitions : newlyDiscovered.int2ObjectEntrySet()) {
            log.info("New partitions %s discovered for stream [%s], added to task group [%s]", taskGroupPartitions.getValue(), ioConfig.getStream(), taskGroupPartitions.getIntKey());
        }
    }
    if (!partitionIds.equals(previousPartitionIds)) {
        assignRecordSupplierToPartitionIds();
        // repartitioning quickly by creating new tasks
        for (TaskGroup taskGroup : activelyReadingTaskGroups.values()) {
            if (!taskGroup.taskIds().isEmpty()) {
                // Partitions have changed and we are managing active tasks - set an early publish time
                // at the current time + repartitionTransitionDuration.
                // This allows time for the stream to start writing to the new partitions after repartitioning.
                // For Kinesis ingestion, this cooldown time is particularly useful, lowering the possibility of
                // the new shards being empty, which can cause issues presently
                // (see https://github.com/apache/druid/issues/7600)
                earlyStopTime = DateTimes.nowUtc().plus(tuningConfig.getRepartitionTransitionDuration());
                log.info("Previous partition set [%s] has changed to [%s] - requesting that tasks stop after [%s] at [%s]", previousPartitionIds, partitionIds, tuningConfig.getRepartitionTransitionDuration(), earlyStopTime);
                break;
            }
        }
    }
    return true;
}
Also used : SeekableStreamIndexTask(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTask) Pair(org.apache.druid.java.util.common.Pair) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) TaskQueue(org.apache.druid.indexing.overlord.TaskQueue) Optional(com.google.common.base.Optional) TaskRunner(org.apache.druid.indexing.overlord.TaskRunner) Duration(java.time.Duration) Map(java.util.Map) IAE(org.apache.druid.java.util.common.IAE) Execs(org.apache.druid.java.util.common.concurrent.Execs) SeekableStreamDataSourceMetadata(org.apache.druid.indexing.seekablestream.SeekableStreamDataSourceMetadata) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) NotNull(javax.validation.constraints.NotNull) Int2ObjectLinkedOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap) LagStats(org.apache.druid.indexing.overlord.supervisor.autoscaler.LagStats) TaskState(org.apache.druid.indexer.TaskState) Stream(java.util.stream.Stream) Predicate(com.google.common.base.Predicate) RowIngestionMetersFactory(org.apache.druid.segment.incremental.RowIngestionMetersFactory) TaskMaster(org.apache.druid.indexing.overlord.TaskMaster) TaskStorage(org.apache.druid.indexing.overlord.TaskStorage) Joiner(com.google.common.base.Joiner) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) Iterables(com.google.common.collect.Iterables) SupervisorStateManager(org.apache.druid.indexing.overlord.supervisor.SupervisorStateManager) Callable(java.util.concurrent.Callable) TaskStatus(org.apache.druid.indexer.TaskStatus) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) EntryExistsException(org.apache.druid.metadata.EntryExistsException) SeekableStreamIndexTaskIOConfig(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskIOConfig) StringComparators(org.apache.druid.query.ordering.StringComparators) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) BiConsumer(java.util.function.BiConsumer) SupervisorManager(org.apache.druid.indexing.overlord.supervisor.SupervisorManager) AutoScalerConfig(org.apache.druid.indexing.seekablestream.supervisor.autoscaler.AutoScalerConfig) RetryUtils(org.apache.druid.java.util.common.RetryUtils) SeekableStreamIndexTaskClientFactory(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskClientFactory) Nullable(javax.annotation.Nullable) SeekableStreamIndexTaskTuningConfig(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskTuningConfig) SeekableStreamIndexTaskClient(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskClient) BlockingDeque(java.util.concurrent.BlockingDeque) ServiceMetricEvent(org.apache.druid.java.util.emitter.service.ServiceMetricEvent) TaskLocation(org.apache.druid.indexer.TaskLocation) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) Futures(com.google.common.util.concurrent.Futures) TaskInfoProvider(org.apache.druid.indexing.common.TaskInfoProvider) TreeMap(java.util.TreeMap) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) LinkedBlockingDeque(java.util.concurrent.LinkedBlockingDeque) SupervisorReport(org.apache.druid.indexing.overlord.supervisor.SupervisorReport) Preconditions(com.google.common.base.Preconditions) DataSchema(org.apache.druid.segment.indexing.DataSchema) SeekableStreamSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamSequenceNumbers) StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) TimeoutException(java.util.concurrent.TimeoutException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Task(org.apache.druid.indexing.common.task.Task) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) DateTimes(org.apache.druid.java.util.common.DateTimes) Function(com.google.common.base.Function) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) StringUtils(org.apache.druid.java.util.common.StringUtils) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) OrderedSequenceNumber(org.apache.druid.indexing.seekablestream.common.OrderedSequenceNumber) StreamException(org.apache.druid.indexing.seekablestream.common.StreamException) List(java.util.List) MetadataSupervisorManager(org.apache.druid.metadata.MetadataSupervisorManager) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) IndexerMetadataStorageCoordinator(org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator) Entry(java.util.Map.Entry) ByteEntity(org.apache.druid.data.input.impl.ByteEntity) SortedMap(java.util.SortedMap) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) RecordSupplier(org.apache.druid.indexing.seekablestream.common.RecordSupplier) HashMap(java.util.HashMap) HashSet(java.util.HashSet) MapperFeature(com.fasterxml.jackson.databind.MapperFeature) ImmutableList(com.google.common.collect.ImmutableList) IndexTaskClient(org.apache.druid.indexing.common.IndexTaskClient) TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) ExecutorService(java.util.concurrent.ExecutorService) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) Iterator(java.util.Iterator) ReentrantLock(java.util.concurrent.locks.ReentrantLock) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) TimeUnit(java.util.concurrent.TimeUnit) TaskRunnerWorkItem(org.apache.druid.indexing.overlord.TaskRunnerWorkItem) VisibleForTesting(com.google.common.annotations.VisibleForTesting) DigestUtils(org.apache.commons.codec.digest.DigestUtils) Supervisor(org.apache.druid.indexing.overlord.supervisor.Supervisor) Comparator(java.util.Comparator) Collections(java.util.Collections) SeekableStreamIndexTaskRunner(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskRunner) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) EntryExistsException(org.apache.druid.metadata.EntryExistsException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) StreamException(org.apache.druid.indexing.seekablestream.common.StreamException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) StreamException(org.apache.druid.indexing.seekablestream.common.StreamException) ISE(org.apache.druid.java.util.common.ISE) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Int2ObjectLinkedOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap)

Example 9 with Task

use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.

the class SeekableStreamSupervisor method discoverTasks.

private void discoverTasks() throws ExecutionException, InterruptedException, TimeoutException {
    int taskCount = 0;
    List<String> futureTaskIds = new ArrayList<>();
    List<ListenableFuture<Boolean>> futures = new ArrayList<>();
    List<Task> tasks = taskStorage.getActiveTasksByDatasource(dataSource);
    final Map<Integer, TaskGroup> taskGroupsToVerify = new HashMap<>();
    for (Task task : tasks) {
        if (!doesTaskTypeMatchSupervisor(task)) {
            continue;
        }
        taskCount++;
        @SuppressWarnings("unchecked") final SeekableStreamIndexTask<PartitionIdType, SequenceOffsetType, RecordType> seekableStreamIndexTask = (SeekableStreamIndexTask<PartitionIdType, SequenceOffsetType, RecordType>) task;
        final String taskId = task.getId();
        // having to map expired partitions which are no longer tracked in partitionIds to a task group.
        if (supportsPartitionExpiration()) {
            Set<PartitionIdType> taskPartitions = seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().keySet();
            Set<PartitionIdType> inactivePartitionsInTask = Sets.difference(taskPartitions, new HashSet<>(partitionIds));
            if (!inactivePartitionsInTask.isEmpty()) {
                killTaskWithSuccess(taskId, "Task [%s] with partition set [%s] has inactive partitions [%s], stopping task.", taskId, taskPartitions, inactivePartitionsInTask);
                continue;
            }
        }
        // Determine which task group this task belongs to based on one of the partitions handled by this task. If we
        // later determine that this task is actively reading, we will make sure that it matches our current partition
        // allocation (getTaskGroupIdForPartition(partition) should return the same value for every partition being read
        // by this task) and kill it if it is not compatible. If the task is instead found to be in the publishing
        // state, we will permit it to complete even if it doesn't match our current partition allocation to support
        // seamless schema migration.
        Iterator<PartitionIdType> it = seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().keySet().iterator();
        final Integer taskGroupId = (it.hasNext() ? getTaskGroupIdForPartition(it.next()) : null);
        if (taskGroupId != null) {
            // check to see if we already know about this task, either in [activelyReadingTaskGroups] or in [pendingCompletionTaskGroups]
            // and if not add it to activelyReadingTaskGroups or pendingCompletionTaskGroups (if status = PUBLISHING)
            TaskGroup taskGroup = activelyReadingTaskGroups.get(taskGroupId);
            if (!isTaskInPendingCompletionGroups(taskId) && (taskGroup == null || !taskGroup.tasks.containsKey(taskId))) {
                futureTaskIds.add(taskId);
                futures.add(Futures.transform(taskClient.getStatusAsync(taskId), new Function<SeekableStreamIndexTaskRunner.Status, Boolean>() {

                    @Override
                    public Boolean apply(SeekableStreamIndexTaskRunner.Status status) {
                        try {
                            log.debug("Task [%s], status [%s]", taskId, status);
                            if (status == SeekableStreamIndexTaskRunner.Status.PUBLISHING) {
                                seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().keySet().forEach(partition -> addDiscoveredTaskToPendingCompletionTaskGroups(getTaskGroupIdForPartition(partition), taskId, seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap()));
                                // update partitionGroups with the publishing task's sequences (if they are greater than what is
                                // existing) so that the next tasks will start reading from where this task left off
                                Map<PartitionIdType, SequenceOffsetType> publishingTaskEndOffsets = taskClient.getEndOffsets(taskId);
                                // If we received invalid endOffset values, we clear the known offset to refetch the last committed offset
                                // from metadata. If any endOffset values are invalid, we treat the entire set as invalid as a safety measure.
                                boolean endOffsetsAreInvalid = false;
                                for (Entry<PartitionIdType, SequenceOffsetType> entry : publishingTaskEndOffsets.entrySet()) {
                                    PartitionIdType partition = entry.getKey();
                                    SequenceOffsetType sequence = entry.getValue();
                                    if (sequence.equals(getEndOfPartitionMarker())) {
                                        log.info("Got end of partition marker for partition [%s] from task [%s] in discoverTasks, clearing partition offset to refetch from metadata..", taskId, partition);
                                        endOffsetsAreInvalid = true;
                                        partitionOffsets.put(partition, getNotSetMarker());
                                    }
                                }
                                if (!endOffsetsAreInvalid) {
                                    for (Entry<PartitionIdType, SequenceOffsetType> entry : publishingTaskEndOffsets.entrySet()) {
                                        PartitionIdType partition = entry.getKey();
                                        SequenceOffsetType sequence = entry.getValue();
                                        boolean succeeded;
                                        do {
                                            succeeded = true;
                                            SequenceOffsetType previousOffset = partitionOffsets.putIfAbsent(partition, sequence);
                                            if (previousOffset != null && (makeSequenceNumber(previousOffset).compareTo(makeSequenceNumber(sequence))) < 0) {
                                                succeeded = partitionOffsets.replace(partition, previousOffset, sequence);
                                            }
                                        } while (!succeeded);
                                    }
                                }
                            } else {
                                for (PartitionIdType partition : seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap().keySet()) {
                                    if (!taskGroupId.equals(getTaskGroupIdForPartition(partition))) {
                                        log.warn("Stopping task [%s] which does not match the expected partition allocation", taskId);
                                        try {
                                            stopTask(taskId, false).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
                                        } catch (InterruptedException | ExecutionException | TimeoutException e) {
                                            stateManager.recordThrowableEvent(e);
                                            log.warn(e, "Exception while stopping task");
                                        }
                                        return false;
                                    }
                                }
                                // if it is current then only create corresponding taskGroup if it does not exist
                                if (!isTaskCurrent(taskGroupId, taskId)) {
                                    log.info("Stopping task [%s] which does not match the expected parameters and ingestion spec", taskId);
                                    try {
                                        stopTask(taskId, false).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
                                    } catch (InterruptedException | ExecutionException | TimeoutException e) {
                                        stateManager.recordThrowableEvent(e);
                                        log.warn(e, "Exception while stopping task");
                                    }
                                    return false;
                                } else {
                                    final TaskGroup taskGroup = activelyReadingTaskGroups.computeIfAbsent(taskGroupId, k -> {
                                        log.info("Creating a new task group for taskGroupId[%d]", taskGroupId);
                                        // task group so that the replica segment allocations are the same.
                                        return new TaskGroup(taskGroupId, ImmutableMap.copyOf(seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getPartitionSequenceNumberMap()), null, seekableStreamIndexTask.getIOConfig().getMinimumMessageTime(), seekableStreamIndexTask.getIOConfig().getMaximumMessageTime(), seekableStreamIndexTask.getIOConfig().getStartSequenceNumbers().getExclusivePartitions(), seekableStreamIndexTask.getIOConfig().getBaseSequenceName());
                                    });
                                    taskGroupsToVerify.put(taskGroupId, taskGroup);
                                    final TaskData prevTaskData = taskGroup.tasks.putIfAbsent(taskId, new TaskData());
                                    if (prevTaskData != null) {
                                        throw new ISE("taskGroup[%s] already exists for new task[%s]", prevTaskData, taskId);
                                    }
                                    verifySameSequenceNameForAllTasksInGroup(taskGroupId);
                                }
                            }
                            return true;
                        } catch (Throwable t) {
                            stateManager.recordThrowableEvent(t);
                            log.error(t, "Something bad while discovering task [%s]", taskId);
                            return null;
                        }
                    }
                }, workerExec));
            }
        }
    }
    List<Boolean> results = Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
    for (int i = 0; i < results.size(); i++) {
        if (results.get(i) == null) {
            String taskId = futureTaskIds.get(i);
            killTask(taskId, "Task [%s] failed to return status, killing task", taskId);
        }
    }
    log.debug("Found [%d] seekablestream indexing tasks for dataSource [%s]", taskCount, dataSource);
    // make sure the checkpoints are consistent with each other and with the metadata store
    verifyAndMergeCheckpoints(taskGroupsToVerify.values());
}
Also used : SeekableStreamIndexTask(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTask) Task(org.apache.druid.indexing.common.task.Task) SeekableStreamIndexTask(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTask) Int2ObjectLinkedOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) Function(com.google.common.base.Function) ISE(org.apache.druid.java.util.common.ISE) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) TaskStatus(org.apache.druid.indexer.TaskStatus) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) SeekableStreamIndexTaskRunner(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskRunner)

Example 10 with Task

use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.

the class SegmentAllocateActionTest method testSameIntervalWithSegmentGranularity.

@Test
public void testSameIntervalWithSegmentGranularity() {
    final Task task = NoopTask.create();
    taskActionTestKit.getTaskLockbox().add(task);
    Granularity segmentGranularity = new PeriodGranularity(Period.hours(1), null, DateTimes.inferTzFromString("Asia/Shanghai"));
    final SegmentIdWithShardSpec id1 = allocate(task, PARTY_TIME, Granularities.MINUTE, segmentGranularity, "s1", null);
    final SegmentIdWithShardSpec id2 = allocate(task, PARTY_TIME, Granularities.MINUTE, segmentGranularity, "s2", null);
    Assert.assertNotNull(id1);
    Assert.assertNotNull(id2);
}
Also used : Task(org.apache.druid.indexing.common.task.Task) NoopTask(org.apache.druid.indexing.common.task.NoopTask) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) Granularity(org.apache.druid.java.util.common.granularity.Granularity) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) LockGranularity(org.apache.druid.indexing.common.LockGranularity) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) Test(org.junit.Test)

Aggregations

Task (org.apache.druid.indexing.common.task.Task)383 Test (org.junit.Test)307 NoopTask (org.apache.druid.indexing.common.task.NoopTask)177 HashMap (java.util.HashMap)132 Map (java.util.Map)132 RealtimeIndexTask (org.apache.druid.indexing.common.task.RealtimeIndexTask)120 ArrayList (java.util.ArrayList)114 ImmutableMap (com.google.common.collect.ImmutableMap)104 TreeMap (java.util.TreeMap)100 TaskStatus (org.apache.druid.indexer.TaskStatus)100 TaskRunnerListener (org.apache.druid.indexing.overlord.TaskRunnerListener)98 Executor (java.util.concurrent.Executor)86 List (java.util.List)78 AbstractTask (org.apache.druid.indexing.common.task.AbstractTask)78 Collection (java.util.Collection)70 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)68 TaskLocation (org.apache.druid.indexer.TaskLocation)62 TaskLock (org.apache.druid.indexing.common.TaskLock)60 ImmutableList (com.google.common.collect.ImmutableList)58 ISE (org.apache.druid.java.util.common.ISE)58