use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.
the class ParallelIndexSupervisorTask method runHashPartitionMultiPhaseParallel.
@VisibleForTesting
TaskStatus runHashPartitionMultiPhaseParallel(TaskToolbox toolbox) throws Exception {
TaskState state;
ParallelIndexIngestionSpec ingestionSchemaToUse = ingestionSchema;
if (!(ingestionSchema.getTuningConfig().getPartitionsSpec() instanceof HashedPartitionsSpec)) {
// only range and hash partitioning is supported for multiphase parallel ingestion, see runMultiPhaseParallel()
throw new ISE("forceGuaranteedRollup is set but partitionsSpec [%s] is not a single_dim or hash partition spec.", ingestionSchema.getTuningConfig().getPartitionsSpec());
}
final Map<Interval, Integer> intervalToNumShards;
HashedPartitionsSpec partitionsSpec = (HashedPartitionsSpec) ingestionSchema.getTuningConfig().getPartitionsSpec();
final boolean needsInputSampling = partitionsSpec.getNumShards() == null || ingestionSchemaToUse.getDataSchema().getGranularitySpec().inputIntervals().isEmpty();
if (needsInputSampling) {
// 0. need to determine intervals and numShards by scanning the data
LOG.info("Needs to determine intervals or numShards, beginning %s phase.", PartialDimensionCardinalityTask.TYPE);
ParallelIndexTaskRunner<PartialDimensionCardinalityTask, DimensionCardinalityReport> cardinalityRunner = createRunner(toolbox, this::createPartialDimensionCardinalityRunner);
state = runNextPhase(cardinalityRunner);
if (state.isFailure()) {
String errMsg = StringUtils.format(TASK_PHASE_FAILURE_MSG, cardinalityRunner.getName());
return TaskStatus.failure(getId(), errMsg);
}
if (cardinalityRunner.getReports().isEmpty()) {
String msg = "No valid rows for hash partitioning." + " All rows may have invalid timestamps or have been filtered out.";
LOG.warn(msg);
return TaskStatus.success(getId(), msg);
}
if (partitionsSpec.getNumShards() == null) {
int effectiveMaxRowsPerSegment = partitionsSpec.getMaxRowsPerSegment() == null ? PartitionsSpec.DEFAULT_MAX_ROWS_PER_SEGMENT : partitionsSpec.getMaxRowsPerSegment();
LOG.info("effective maxRowsPerSegment is: " + effectiveMaxRowsPerSegment);
intervalToNumShards = determineNumShardsFromCardinalityReport(cardinalityRunner.getReports().values(), effectiveMaxRowsPerSegment);
} else {
intervalToNumShards = CollectionUtils.mapValues(mergeCardinalityReports(cardinalityRunner.getReports().values()), k -> partitionsSpec.getNumShards());
}
ingestionSchemaToUse = rewriteIngestionSpecWithIntervalsIfMissing(ingestionSchemaToUse, intervalToNumShards.keySet());
} else {
// numShards will be determined in PartialHashSegmentGenerateTask
intervalToNumShards = null;
}
// 1. Partial segment generation phase
final ParallelIndexIngestionSpec segmentCreateIngestionSpec = ingestionSchemaToUse;
ParallelIndexTaskRunner<PartialHashSegmentGenerateTask, GeneratedPartitionsReport> indexingRunner = createRunner(toolbox, f -> createPartialHashSegmentGenerateRunner(toolbox, segmentCreateIngestionSpec, intervalToNumShards));
state = runNextPhase(indexingRunner);
if (state.isFailure()) {
String errMsg = StringUtils.format(TASK_PHASE_FAILURE_MSG, indexingRunner.getName());
return TaskStatus.failure(getId(), errMsg);
}
// 2. Partial segment merge phase
// partition (interval, partitionId) -> partition locations
Map<Partition, List<PartitionLocation>> partitionToLocations = getPartitionToLocations(indexingRunner.getReports());
final List<PartialSegmentMergeIOConfig> ioConfigs = createGenericMergeIOConfigs(ingestionSchema.getTuningConfig().getTotalNumMergeTasks(), partitionToLocations);
final ParallelIndexIngestionSpec segmentMergeIngestionSpec = ingestionSchemaToUse;
final ParallelIndexTaskRunner<PartialGenericSegmentMergeTask, PushedSegmentsReport> mergeRunner = createRunner(toolbox, tb -> createPartialGenericSegmentMergeRunner(tb, ioConfigs, segmentMergeIngestionSpec));
state = runNextPhase(mergeRunner);
TaskStatus taskStatus;
if (state.isSuccess()) {
// noinspection ConstantConditions
publishSegments(toolbox, mergeRunner.getReports());
if (awaitSegmentAvailabilityTimeoutMillis > 0) {
waitForSegmentAvailability(mergeRunner.getReports());
}
taskStatus = TaskStatus.success(getId());
} else {
// there is only success or failure after running....
Preconditions.checkState(state.isFailure(), "Unrecognized state after task is complete[%s]", state);
String errMsg = StringUtils.format(TASK_PHASE_FAILURE_MSG, mergeRunner.getName());
taskStatus = TaskStatus.failure(getId(), errMsg);
}
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports(taskStatus, segmentAvailabilityConfirmationCompleted));
return taskStatus;
}
use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.
the class SingleTaskBackgroundRunner method stop.
@Override
@LifecycleStop
public void stop() {
stopping = true;
if (executorService != null) {
try {
executorService.shutdown();
} catch (SecurityException ex) {
log.error(ex, "I can't control my own threads!");
}
}
if (runningItem != null) {
final Task task = runningItem.getTask();
final long start = System.currentTimeMillis();
final long elapsed;
boolean error = false;
// stopGracefully for resource cleaning
log.info("Starting graceful shutdown of task[%s].", task.getId());
task.stopGracefully(taskConfig);
if (taskConfig.isRestoreTasksOnRestart() && task.canRestore()) {
try {
final TaskStatus taskStatus = runningItem.getResult().get(new Interval(DateTimes.utc(start), taskConfig.getGracefulShutdownTimeout()).toDurationMillis(), TimeUnit.MILLISECONDS);
// Ignore status, it doesn't matter for graceful shutdowns.
log.info("Graceful shutdown of task[%s] finished in %,dms.", task.getId(), System.currentTimeMillis() - start);
TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), taskStatus);
} catch (Exception e) {
log.makeAlert(e, "Graceful task shutdown failed: %s", task.getDataSource()).addData("taskId", task.getId()).addData("dataSource", task.getDataSource()).emit();
log.warn(e, "Graceful shutdown of task[%s] aborted with exception.", task.getId());
error = true;
// Creating a new status to only feed listeners seems quite strange.
// This is currently OK because we have no listeners yet registered in peon.
// However, we should fix this in the near future by always retrieving task status
// from one single source of truth that is also propagated to the overlord.
// See https://github.com/apache/druid/issues/11445.
TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), TaskStatus.failure(task.getId(), "Failed to stop gracefully with exception. See task logs for more details."));
}
} else {
// Creating a new status to only feed listeners seems quite strange.
// This is currently OK because we have no listeners yet registered in peon.
// However, we should fix this in the near future by always retrieving task status
// from one single source of truth that is also propagated to the overlord.
// See https://github.com/apache/druid/issues/11445.
TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), TaskStatus.failure(task.getId(), "Canceled as task execution process stopped"));
}
elapsed = System.currentTimeMillis() - start;
final ServiceMetricEvent.Builder metricBuilder = ServiceMetricEvent.builder().setDimension("task", task.getId()).setDimension("dataSource", task.getDataSource()).setDimension("graceful", // for backward compatibility
"true").setDimension("error", String.valueOf(error));
emitter.emit(metricBuilder.build("task/interrupt/count", 1L));
emitter.emit(metricBuilder.build("task/interrupt/elapsed", elapsed));
}
// Ok, now interrupt everything.
if (executorService != null) {
try {
executorService.shutdownNow();
} catch (SecurityException ex) {
log.error(ex, "I can't control my own threads!");
}
}
}
use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.
the class SingleTaskBackgroundRunner method getQueryRunnerImpl.
private <T> QueryRunner<T> getQueryRunnerImpl(Query<T> query) {
QueryRunner<T> queryRunner = null;
if (runningItem != null) {
final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
final Task task = runningItem.getTask();
if (analysis.getBaseTableDataSource().isPresent() && task.getDataSource().equals(analysis.getBaseTableDataSource().get().getName())) {
final QueryRunner<T> taskQueryRunner = task.getQueryRunner(query);
if (taskQueryRunner != null) {
queryRunner = taskQueryRunner;
}
}
}
return new SetAndVerifyContextQueryRunner<>(serverConfig, queryRunner == null ? new NoopQueryRunner<>() : queryRunner);
}
use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.
the class TaskLockbox method syncFromStorage.
/**
* Wipe out our current in-memory state and resync it from our bundled {@link TaskStorage}.
*/
public void syncFromStorage() {
giant.lock();
try {
// Load stuff from taskStorage first. If this fails, we don't want to lose all our locks.
final Set<String> storedActiveTasks = new HashSet<>();
final List<Pair<Task, TaskLock>> storedLocks = new ArrayList<>();
for (final Task task : taskStorage.getActiveTasks()) {
storedActiveTasks.add(task.getId());
for (final TaskLock taskLock : taskStorage.getLocks(task.getId())) {
storedLocks.add(Pair.of(task, taskLock));
}
}
// Sort locks by version, so we add them back in the order they were acquired.
final Ordering<Pair<Task, TaskLock>> byVersionOrdering = new Ordering<Pair<Task, TaskLock>>() {
@Override
public int compare(Pair<Task, TaskLock> left, Pair<Task, TaskLock> right) {
// The second compare shouldn't be necessary, but, whatever.
return ComparisonChain.start().compare(left.rhs.getVersion(), right.rhs.getVersion()).compare(left.lhs.getId(), right.lhs.getId()).result();
}
};
running.clear();
activeTasks.clear();
activeTasks.addAll(storedActiveTasks);
// Bookkeeping for a log message at the end
int taskLockCount = 0;
for (final Pair<Task, TaskLock> taskAndLock : byVersionOrdering.sortedCopy(storedLocks)) {
final Task task = Preconditions.checkNotNull(taskAndLock.lhs, "task");
final TaskLock savedTaskLock = Preconditions.checkNotNull(taskAndLock.rhs, "savedTaskLock");
if (savedTaskLock.getInterval().toDurationMillis() <= 0) {
// "Impossible", but you never know what crazy stuff can be restored from storage.
log.warn("Ignoring lock[%s] with empty interval for task: %s", savedTaskLock, task.getId());
continue;
}
// Create a new taskLock if it doesn't have a proper priority,
// so that every taskLock in memory has the priority.
final TaskLock savedTaskLockWithPriority = savedTaskLock.getPriority() == null ? savedTaskLock.withPriority(task.getPriority()) : savedTaskLock;
final TaskLockPosse taskLockPosse = verifyAndCreateOrFindLockPosse(task, savedTaskLockWithPriority);
if (taskLockPosse != null) {
taskLockPosse.addTask(task);
final TaskLock taskLock = taskLockPosse.getTaskLock();
if (savedTaskLockWithPriority.getVersion().equals(taskLock.getVersion())) {
taskLockCount++;
log.info("Reacquired lock[%s] for task: %s", taskLock, task.getId());
} else {
taskLockCount++;
log.info("Could not reacquire lock on interval[%s] version[%s] (got version[%s] instead) for task: %s", savedTaskLockWithPriority.getInterval(), savedTaskLockWithPriority.getVersion(), taskLock.getVersion(), task.getId());
}
} else {
throw new ISE("Could not reacquire lock on interval[%s] version[%s] for task: %s", savedTaskLockWithPriority.getInterval(), savedTaskLockWithPriority.getVersion(), task.getId());
}
}
log.info("Synced %,d locks for %,d activeTasks from storage (%,d locks ignored).", taskLockCount, activeTasks.size(), storedLocks.size() - taskLockCount);
} finally {
giant.unlock();
}
}
use of org.apache.druid.indexing.common.task.Task in project druid by druid-io.
the class TaskLockbox method revokeLock.
/**
* Mark the lock as revoked. Note that revoked locks are NOT removed. Instead, they are maintained in {@link #running}
* and {@link #taskStorage} as the normal locks do. This is to check locks are revoked when they are requested to be
* acquired and notify to the callers if revoked. Revoked locks are removed by calling
* {@link #unlock(Task, Interval)}.
*
* @param taskId an id of the task holding the lock
* @param lock lock to be revoked
*/
@VisibleForTesting
protected void revokeLock(String taskId, TaskLock lock) {
giant.lock();
try {
if (!activeTasks.contains(taskId)) {
throw new ISE("Cannot revoke lock for inactive task[%s]", taskId);
}
final Task task = taskStorage.getTask(taskId).orNull();
if (task == null) {
throw new ISE("Cannot revoke lock for unknown task[%s]", taskId);
}
log.info("Revoking task lock[%s] for task[%s]", lock, taskId);
if (lock.isRevoked()) {
log.warn("TaskLock[%s] is already revoked", lock);
} else {
final TaskLock revokedLock = lock.revokedCopy();
taskStorage.replaceLock(taskId, lock, revokedLock);
final List<TaskLockPosse> possesHolder = running.get(task.getDataSource()).get(lock.getInterval().getStart()).get(lock.getInterval());
final TaskLockPosse foundPosse = possesHolder.stream().filter(posse -> posse.getTaskLock().equals(lock)).findFirst().orElseThrow(() -> new ISE("Failed to find lock posse for lock[%s]", lock));
possesHolder.remove(foundPosse);
possesHolder.add(foundPosse.withTaskLock(revokedLock));
log.info("Revoked taskLock[%s]", lock);
}
} finally {
giant.unlock();
}
}
Aggregations