Search in sources :

Example 1 with Task

use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.

the class ParallelIndexSupervisorTask method runHashPartitionMultiPhaseParallel.

@VisibleForTesting
TaskStatus runHashPartitionMultiPhaseParallel(TaskToolbox toolbox) throws Exception {
    TaskState state;
    ParallelIndexIngestionSpec ingestionSchemaToUse = ingestionSchema;
    if (!(ingestionSchema.getTuningConfig().getPartitionsSpec() instanceof HashedPartitionsSpec)) {
        // only range and hash partitioning is supported for multiphase parallel ingestion, see runMultiPhaseParallel()
        throw new ISE("forceGuaranteedRollup is set but partitionsSpec [%s] is not a single_dim or hash partition spec.", ingestionSchema.getTuningConfig().getPartitionsSpec());
    }
    final Map<Interval, Integer> intervalToNumShards;
    HashedPartitionsSpec partitionsSpec = (HashedPartitionsSpec) ingestionSchema.getTuningConfig().getPartitionsSpec();
    final boolean needsInputSampling = partitionsSpec.getNumShards() == null || ingestionSchemaToUse.getDataSchema().getGranularitySpec().inputIntervals().isEmpty();
    if (needsInputSampling) {
        // 0. need to determine intervals and numShards by scanning the data
        LOG.info("Needs to determine intervals or numShards, beginning %s phase.", PartialDimensionCardinalityTask.TYPE);
        ParallelIndexTaskRunner<PartialDimensionCardinalityTask, DimensionCardinalityReport> cardinalityRunner = createRunner(toolbox, this::createPartialDimensionCardinalityRunner);
        state = runNextPhase(cardinalityRunner);
        if (state.isFailure()) {
            String errMsg = StringUtils.format(TASK_PHASE_FAILURE_MSG, cardinalityRunner.getName());
            return TaskStatus.failure(getId(), errMsg);
        }
        if (cardinalityRunner.getReports().isEmpty()) {
            String msg = "No valid rows for hash partitioning." + " All rows may have invalid timestamps or have been filtered out.";
            LOG.warn(msg);
            return TaskStatus.success(getId(), msg);
        }
        if (partitionsSpec.getNumShards() == null) {
            int effectiveMaxRowsPerSegment = partitionsSpec.getMaxRowsPerSegment() == null ? PartitionsSpec.DEFAULT_MAX_ROWS_PER_SEGMENT : partitionsSpec.getMaxRowsPerSegment();
            LOG.info("effective maxRowsPerSegment is: " + effectiveMaxRowsPerSegment);
            intervalToNumShards = determineNumShardsFromCardinalityReport(cardinalityRunner.getReports().values(), effectiveMaxRowsPerSegment);
        } else {
            intervalToNumShards = CollectionUtils.mapValues(mergeCardinalityReports(cardinalityRunner.getReports().values()), k -> partitionsSpec.getNumShards());
        }
        ingestionSchemaToUse = rewriteIngestionSpecWithIntervalsIfMissing(ingestionSchemaToUse, intervalToNumShards.keySet());
    } else {
        // numShards will be determined in PartialHashSegmentGenerateTask
        intervalToNumShards = null;
    }
    // 1. Partial segment generation phase
    final ParallelIndexIngestionSpec segmentCreateIngestionSpec = ingestionSchemaToUse;
    ParallelIndexTaskRunner<PartialHashSegmentGenerateTask, GeneratedPartitionsReport> indexingRunner = createRunner(toolbox, f -> createPartialHashSegmentGenerateRunner(toolbox, segmentCreateIngestionSpec, intervalToNumShards));
    state = runNextPhase(indexingRunner);
    if (state.isFailure()) {
        String errMsg = StringUtils.format(TASK_PHASE_FAILURE_MSG, indexingRunner.getName());
        return TaskStatus.failure(getId(), errMsg);
    }
    // 2. Partial segment merge phase
    // partition (interval, partitionId) -> partition locations
    Map<Partition, List<PartitionLocation>> partitionToLocations = getPartitionToLocations(indexingRunner.getReports());
    final List<PartialSegmentMergeIOConfig> ioConfigs = createGenericMergeIOConfigs(ingestionSchema.getTuningConfig().getTotalNumMergeTasks(), partitionToLocations);
    final ParallelIndexIngestionSpec segmentMergeIngestionSpec = ingestionSchemaToUse;
    final ParallelIndexTaskRunner<PartialGenericSegmentMergeTask, PushedSegmentsReport> mergeRunner = createRunner(toolbox, tb -> createPartialGenericSegmentMergeRunner(tb, ioConfigs, segmentMergeIngestionSpec));
    state = runNextPhase(mergeRunner);
    TaskStatus taskStatus;
    if (state.isSuccess()) {
        // noinspection ConstantConditions
        publishSegments(toolbox, mergeRunner.getReports());
        if (awaitSegmentAvailabilityTimeoutMillis > 0) {
            waitForSegmentAvailability(mergeRunner.getReports());
        }
        taskStatus = TaskStatus.success(getId());
    } else {
        // there is only success or failure after running....
        Preconditions.checkState(state.isFailure(), "Unrecognized state after task is complete[%s]", state);
        String errMsg = StringUtils.format(TASK_PHASE_FAILURE_MSG, mergeRunner.getName());
        taskStatus = TaskStatus.failure(getId(), errMsg);
    }
    toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports(taskStatus, segmentAvailabilityConfirmationCompleted));
    return taskStatus;
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) PartitionBoundaries(org.apache.druid.timeline.partition.PartitionBoundaries) Produces(javax.ws.rs.Produces) IngestionState(org.apache.druid.indexer.IngestionState) Pair(org.apache.druid.java.util.common.Pair) MediaType(javax.ws.rs.core.MediaType) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) FiniteFirehoseFactory(org.apache.druid.data.input.FiniteFirehoseFactory) Map(java.util.Map) StringDistribution(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution) AbstractBatchIndexTask(org.apache.druid.indexing.common.task.AbstractBatchIndexTask) InputFormat(org.apache.druid.data.input.InputFormat) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) TaskState(org.apache.druid.indexer.TaskState) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) IndexTaskUtils(org.apache.druid.indexing.common.task.IndexTaskUtils) Granularity(org.apache.druid.java.util.common.granularity.Granularity) GET(javax.ws.rs.GET) Tasks(org.apache.druid.indexing.common.task.Tasks) TaskStatus(org.apache.druid.indexer.TaskStatus) ArrayList(java.util.ArrayList) IndexTask(org.apache.druid.indexing.common.task.IndexTask) Interval(org.joda.time.Interval) HttpServletRequest(javax.servlet.http.HttpServletRequest) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) StringSketchMerger(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringSketchMerger) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) BuildingShardSpec(org.apache.druid.timeline.partition.BuildingShardSpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) Throwables(com.google.common.base.Throwables) StringDistributionMerger(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistributionMerger) IOException(java.io.IOException) TreeMap(java.util.TreeMap) ChatHandlers(org.apache.druid.segment.realtime.firehose.ChatHandlers) Preconditions(com.google.common.base.Preconditions) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) SubTaskSpecStatus(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTaskRunner.SubTaskSpecStatus) HllSketch(org.apache.datasketches.hll.HllSketch) AuthorizerMapper(org.apache.druid.server.security.AuthorizerMapper) Path(javax.ws.rs.Path) Memory(org.apache.datasketches.memory.Memory) TaskResource(org.apache.druid.indexing.common.task.TaskResource) MonotonicNonNull(org.checkerframework.checker.nullness.qual.MonotonicNonNull) ChatHandler(org.apache.druid.segment.realtime.firehose.ChatHandler) QueryParam(javax.ws.rs.QueryParam) Consumes(javax.ws.rs.Consumes) Union(org.apache.datasketches.hll.Union) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Task(org.apache.druid.indexing.common.task.Task) SmileMediaTypes(com.fasterxml.jackson.jaxrs.smile.SmileMediaTypes) Context(javax.ws.rs.core.Context) ImmutableMap(com.google.common.collect.ImmutableMap) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) StringUtils(org.apache.druid.java.util.common.StringUtils) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Action(org.apache.druid.server.security.Action) Collectors(java.util.stream.Collectors) MaxAllowedLocksExceededException(org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException) Objects(java.util.Objects) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) List(java.util.List) Response(javax.ws.rs.core.Response) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) CurrentSubTaskHolder(org.apache.druid.indexing.common.task.CurrentSubTaskHolder) Logger(org.apache.druid.java.util.common.logger.Logger) PathParam(javax.ws.rs.PathParam) CollectionUtils(org.apache.druid.utils.CollectionUtils) HashMap(java.util.HashMap) Multimap(com.google.common.collect.Multimap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) Function(java.util.function.Function) TuningConfig(org.apache.druid.segment.indexing.TuningConfig) HashSet(java.util.HashSet) InputSource(org.apache.druid.data.input.InputSource) RowIngestionMetersTotals(org.apache.druid.segment.incremental.RowIngestionMetersTotals) Status(javax.ws.rs.core.Response.Status) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) POST(javax.ws.rs.POST) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) DateTime(org.joda.time.DateTime) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) IntermediaryDataManager(org.apache.druid.indexing.worker.shuffle.IntermediaryDataManager) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Collections(java.util.Collections) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) ISE(org.apache.druid.java.util.common.ISE) ArrayList(java.util.ArrayList) List(java.util.List) TaskStatus(org.apache.druid.indexer.TaskStatus) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TaskState(org.apache.druid.indexer.TaskState) Interval(org.joda.time.Interval) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 2 with Task

use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.

the class SingleTaskBackgroundRunner method stop.

@Override
@LifecycleStop
public void stop() {
    stopping = true;
    if (executorService != null) {
        try {
            executorService.shutdown();
        } catch (SecurityException ex) {
            log.error(ex, "I can't control my own threads!");
        }
    }
    if (runningItem != null) {
        final Task task = runningItem.getTask();
        final long start = System.currentTimeMillis();
        final long elapsed;
        boolean error = false;
        // stopGracefully for resource cleaning
        log.info("Starting graceful shutdown of task[%s].", task.getId());
        task.stopGracefully(taskConfig);
        if (taskConfig.isRestoreTasksOnRestart() && task.canRestore()) {
            try {
                final TaskStatus taskStatus = runningItem.getResult().get(new Interval(DateTimes.utc(start), taskConfig.getGracefulShutdownTimeout()).toDurationMillis(), TimeUnit.MILLISECONDS);
                // Ignore status, it doesn't matter for graceful shutdowns.
                log.info("Graceful shutdown of task[%s] finished in %,dms.", task.getId(), System.currentTimeMillis() - start);
                TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), taskStatus);
            } catch (Exception e) {
                log.makeAlert(e, "Graceful task shutdown failed: %s", task.getDataSource()).addData("taskId", task.getId()).addData("dataSource", task.getDataSource()).emit();
                log.warn(e, "Graceful shutdown of task[%s] aborted with exception.", task.getId());
                error = true;
                // Creating a new status to only feed listeners seems quite strange.
                // This is currently OK because we have no listeners yet registered in peon.
                // However, we should fix this in the near future by always retrieving task status
                // from one single source of truth that is also propagated to the overlord.
                // See https://github.com/apache/druid/issues/11445.
                TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), TaskStatus.failure(task.getId(), "Failed to stop gracefully with exception. See task logs for more details."));
            }
        } else {
            // Creating a new status to only feed listeners seems quite strange.
            // This is currently OK because we have no listeners yet registered in peon.
            // However, we should fix this in the near future by always retrieving task status
            // from one single source of truth that is also propagated to the overlord.
            // See https://github.com/apache/druid/issues/11445.
            TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), TaskStatus.failure(task.getId(), "Canceled as task execution process stopped"));
        }
        elapsed = System.currentTimeMillis() - start;
        final ServiceMetricEvent.Builder metricBuilder = ServiceMetricEvent.builder().setDimension("task", task.getId()).setDimension("dataSource", task.getDataSource()).setDimension("graceful", // for backward compatibility
        "true").setDimension("error", String.valueOf(error));
        emitter.emit(metricBuilder.build("task/interrupt/count", 1L));
        emitter.emit(metricBuilder.build("task/interrupt/elapsed", elapsed));
    }
    // Ok, now interrupt everything.
    if (executorService != null) {
        try {
            executorService.shutdownNow();
        } catch (SecurityException ex) {
            log.error(ex, "I can't control my own threads!");
        }
    }
}
Also used : Task(org.apache.druid.indexing.common.task.Task) ServiceMetricEvent(org.apache.druid.java.util.emitter.service.ServiceMetricEvent) TaskStatus(org.apache.druid.indexer.TaskStatus) Interval(org.joda.time.Interval) LifecycleStop(org.apache.druid.java.util.common.lifecycle.LifecycleStop)

Example 3 with Task

use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.

the class SingleTaskBackgroundRunner method getQueryRunnerImpl.

private <T> QueryRunner<T> getQueryRunnerImpl(Query<T> query) {
    QueryRunner<T> queryRunner = null;
    if (runningItem != null) {
        final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
        final Task task = runningItem.getTask();
        if (analysis.getBaseTableDataSource().isPresent() && task.getDataSource().equals(analysis.getBaseTableDataSource().get().getName())) {
            final QueryRunner<T> taskQueryRunner = task.getQueryRunner(query);
            if (taskQueryRunner != null) {
                queryRunner = taskQueryRunner;
            }
        }
    }
    return new SetAndVerifyContextQueryRunner<>(serverConfig, queryRunner == null ? new NoopQueryRunner<>() : queryRunner);
}
Also used : Task(org.apache.druid.indexing.common.task.Task) SetAndVerifyContextQueryRunner(org.apache.druid.server.SetAndVerifyContextQueryRunner) NoopQueryRunner(org.apache.druid.query.NoopQueryRunner) DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis)

Example 4 with Task

use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.

the class TaskLockbox method syncFromStorage.

/**
 * Wipe out our current in-memory state and resync it from our bundled {@link TaskStorage}.
 */
public void syncFromStorage() {
    giant.lock();
    try {
        // Load stuff from taskStorage first. If this fails, we don't want to lose all our locks.
        final Set<String> storedActiveTasks = new HashSet<>();
        final List<Pair<Task, TaskLock>> storedLocks = new ArrayList<>();
        for (final Task task : taskStorage.getActiveTasks()) {
            storedActiveTasks.add(task.getId());
            for (final TaskLock taskLock : taskStorage.getLocks(task.getId())) {
                storedLocks.add(Pair.of(task, taskLock));
            }
        }
        // Sort locks by version, so we add them back in the order they were acquired.
        final Ordering<Pair<Task, TaskLock>> byVersionOrdering = new Ordering<Pair<Task, TaskLock>>() {

            @Override
            public int compare(Pair<Task, TaskLock> left, Pair<Task, TaskLock> right) {
                // The second compare shouldn't be necessary, but, whatever.
                return ComparisonChain.start().compare(left.rhs.getVersion(), right.rhs.getVersion()).compare(left.lhs.getId(), right.lhs.getId()).result();
            }
        };
        running.clear();
        activeTasks.clear();
        activeTasks.addAll(storedActiveTasks);
        // Bookkeeping for a log message at the end
        int taskLockCount = 0;
        for (final Pair<Task, TaskLock> taskAndLock : byVersionOrdering.sortedCopy(storedLocks)) {
            final Task task = Preconditions.checkNotNull(taskAndLock.lhs, "task");
            final TaskLock savedTaskLock = Preconditions.checkNotNull(taskAndLock.rhs, "savedTaskLock");
            if (savedTaskLock.getInterval().toDurationMillis() <= 0) {
                // "Impossible", but you never know what crazy stuff can be restored from storage.
                log.warn("Ignoring lock[%s] with empty interval for task: %s", savedTaskLock, task.getId());
                continue;
            }
            // Create a new taskLock if it doesn't have a proper priority,
            // so that every taskLock in memory has the priority.
            final TaskLock savedTaskLockWithPriority = savedTaskLock.getPriority() == null ? savedTaskLock.withPriority(task.getPriority()) : savedTaskLock;
            final TaskLockPosse taskLockPosse = verifyAndCreateOrFindLockPosse(task, savedTaskLockWithPriority);
            if (taskLockPosse != null) {
                taskLockPosse.addTask(task);
                final TaskLock taskLock = taskLockPosse.getTaskLock();
                if (savedTaskLockWithPriority.getVersion().equals(taskLock.getVersion())) {
                    taskLockCount++;
                    log.info("Reacquired lock[%s] for task: %s", taskLock, task.getId());
                } else {
                    taskLockCount++;
                    log.info("Could not reacquire lock on interval[%s] version[%s] (got version[%s] instead) for task: %s", savedTaskLockWithPriority.getInterval(), savedTaskLockWithPriority.getVersion(), taskLock.getVersion(), task.getId());
                }
            } else {
                throw new ISE("Could not reacquire lock on interval[%s] version[%s] for task: %s", savedTaskLockWithPriority.getInterval(), savedTaskLockWithPriority.getVersion(), task.getId());
            }
        }
        log.info("Synced %,d locks for %,d activeTasks from storage (%,d locks ignored).", taskLockCount, activeTasks.size(), storedLocks.size() - taskLockCount);
    } finally {
        giant.unlock();
    }
}
Also used : Task(org.apache.druid.indexing.common.task.Task) ArrayList(java.util.ArrayList) TaskLock(org.apache.druid.indexing.common.TaskLock) Ordering(com.google.common.collect.Ordering) ISE(org.apache.druid.java.util.common.ISE) HashSet(java.util.HashSet) Pair(org.apache.druid.java.util.common.Pair)

Example 5 with Task

use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.

the class TaskLockbox method revokeLock.

/**
 * Mark the lock as revoked. Note that revoked locks are NOT removed. Instead, they are maintained in {@link #running}
 * and {@link #taskStorage} as the normal locks do. This is to check locks are revoked when they are requested to be
 * acquired and notify to the callers if revoked. Revoked locks are removed by calling
 * {@link #unlock(Task, Interval)}.
 *
 * @param taskId an id of the task holding the lock
 * @param lock   lock to be revoked
 */
@VisibleForTesting
protected void revokeLock(String taskId, TaskLock lock) {
    giant.lock();
    try {
        if (!activeTasks.contains(taskId)) {
            throw new ISE("Cannot revoke lock for inactive task[%s]", taskId);
        }
        final Task task = taskStorage.getTask(taskId).orNull();
        if (task == null) {
            throw new ISE("Cannot revoke lock for unknown task[%s]", taskId);
        }
        log.info("Revoking task lock[%s] for task[%s]", lock, taskId);
        if (lock.isRevoked()) {
            log.warn("TaskLock[%s] is already revoked", lock);
        } else {
            final TaskLock revokedLock = lock.revokedCopy();
            taskStorage.replaceLock(taskId, lock, revokedLock);
            final List<TaskLockPosse> possesHolder = running.get(task.getDataSource()).get(lock.getInterval().getStart()).get(lock.getInterval());
            final TaskLockPosse foundPosse = possesHolder.stream().filter(posse -> posse.getTaskLock().equals(lock)).findFirst().orElseThrow(() -> new ISE("Failed to find lock posse for lock[%s]", lock));
            possesHolder.remove(foundPosse);
            possesHolder.add(foundPosse.withTaskLock(revokedLock));
            log.info("Revoked taskLock[%s]", lock);
        }
    } finally {
        giant.unlock();
    }
}
Also used : Task(org.apache.druid.indexing.common.task.Task) TaskLock(org.apache.druid.indexing.common.TaskLock) ISE(org.apache.druid.java.util.common.ISE) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

Task (org.apache.druid.indexing.common.task.Task)191 Test (org.junit.Test)153 NoopTask (org.apache.druid.indexing.common.task.NoopTask)88 HashMap (java.util.HashMap)66 Map (java.util.Map)66 RealtimeIndexTask (org.apache.druid.indexing.common.task.RealtimeIndexTask)60 ArrayList (java.util.ArrayList)57 ImmutableMap (com.google.common.collect.ImmutableMap)52 TreeMap (java.util.TreeMap)50 TaskStatus (org.apache.druid.indexer.TaskStatus)50 TaskRunnerListener (org.apache.druid.indexing.overlord.TaskRunnerListener)49 Executor (java.util.concurrent.Executor)43 List (java.util.List)39 AbstractTask (org.apache.druid.indexing.common.task.AbstractTask)39 Collection (java.util.Collection)35 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)34 TaskLocation (org.apache.druid.indexer.TaskLocation)31 TaskLock (org.apache.druid.indexing.common.TaskLock)30 ImmutableList (com.google.common.collect.ImmutableList)29 ISE (org.apache.druid.java.util.common.ISE)29