use of org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException in project druid by druid-io.
the class ParallelIndexSupervisorTask method allocateSegment.
// Internal APIs
/**
* Allocate a new {@link SegmentIdWithShardSpec} for a request from {@link SinglePhaseSubTask}.
* The returned segmentIdentifiers have different {@code partitionNum} (thereby different {@link NumberedShardSpec})
* per bucket interval.
*/
@POST
@Path("/segment/allocate")
@Produces(SmileMediaTypes.APPLICATION_JACKSON_SMILE)
@Consumes(SmileMediaTypes.APPLICATION_JACKSON_SMILE)
public Response allocateSegment(Object param, @Context final HttpServletRequest req) {
ChatHandlers.authorizationCheck(req, Action.READ, getDataSource(), authorizerMapper);
if (toolbox == null) {
return Response.status(Response.Status.SERVICE_UNAVAILABLE).entity("task is not running yet").build();
}
ParallelIndexTaskRunner runner = Preconditions.checkNotNull(getCurrentRunner(), "runner");
if (!(runner instanceof SinglePhaseParallelIndexTaskRunner)) {
throw new ISE("Expected [%s], but [%s] is in use", SinglePhaseParallelIndexTaskRunner.class.getName(), runner.getClass().getName());
}
// This context is set in the constructor of ParallelIndexSupervisorTask if it's not set by others.
final boolean useLineageBasedSegmentAllocation = Preconditions.checkNotNull(getContextValue(SinglePhaseParallelIndexTaskRunner.CTX_USE_LINEAGE_BASED_SEGMENT_ALLOCATION_KEY), "useLineageBasedSegmentAllocation in taskContext");
try {
final SegmentIdWithShardSpec segmentIdentifier;
if (useLineageBasedSegmentAllocation) {
SegmentAllocationRequest request = toolbox.getJsonMapper().convertValue(param, SegmentAllocationRequest.class);
segmentIdentifier = ((SinglePhaseParallelIndexTaskRunner) runner).allocateNewSegment(getDataSource(), request.getTimestamp(), request.getSequenceName(), request.getPrevSegmentId());
} else {
DateTime timestamp = toolbox.getJsonMapper().convertValue(param, DateTime.class);
segmentIdentifier = ((SinglePhaseParallelIndexTaskRunner) runner).allocateNewSegment(getDataSource(), timestamp);
}
return Response.ok(toolbox.getJsonMapper().writeValueAsBytes(segmentIdentifier)).build();
} catch (MaxAllowedLocksExceededException malee) {
getCurrentRunner().stopGracefully(malee.getMessage());
return Response.status(Response.Status.BAD_REQUEST).entity(malee.getMessage()).build();
} catch (IOException | IllegalStateException e) {
return Response.serverError().entity(Throwables.getStackTraceAsString(e)).build();
} catch (IllegalArgumentException e) {
return Response.status(Response.Status.BAD_REQUEST).entity(Throwables.getStackTraceAsString(e)).build();
}
}
use of org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException in project druid by druid-io.
the class SinglePhaseParallelIndexTaskRunner method findIntervalAndVersion.
private NonnullPair<Interval, String> findIntervalAndVersion(DateTime timestamp) throws IOException {
final GranularitySpec granularitySpec = getIngestionSchema().getDataSchema().getGranularitySpec();
// This method is called whenever subtasks need to allocate a new segment via the supervisor task.
// As a result, this code is never called in the Overlord. For now using the materialized intervals
// here is ok for performance reasons
final Set<Interval> materializedBucketIntervals = granularitySpec.materializedBucketIntervals();
// List locks whenever allocating a new segment because locks might be revoked and no longer valid.
final List<TaskLock> locks = getToolbox().getTaskActionClient().submit(new LockListAction());
final TaskLock revokedLock = locks.stream().filter(TaskLock::isRevoked).findAny().orElse(null);
if (revokedLock != null) {
throw new ISE("Lock revoked: [%s]", revokedLock);
}
final Map<Interval, String> versions = locks.stream().collect(Collectors.toMap(TaskLock::getInterval, TaskLock::getVersion));
Interval interval;
String version;
if (!materializedBucketIntervals.isEmpty()) {
// If granularity spec has explicit intervals, we just need to find the version associated to the interval.
// This is because we should have gotten all required locks up front when the task starts up.
final Optional<Interval> maybeInterval = granularitySpec.bucketInterval(timestamp);
if (!maybeInterval.isPresent()) {
throw new IAE("Could not find interval for timestamp [%s]", timestamp);
}
interval = maybeInterval.get();
if (!materializedBucketIntervals.contains(interval)) {
throw new ISE("Unspecified interval[%s] in granularitySpec[%s]", interval, granularitySpec);
}
version = ParallelIndexSupervisorTask.findVersion(versions, interval);
if (version == null) {
throw new ISE("Cannot find a version for interval[%s]", interval);
}
} else {
// We don't have explicit intervals. We can use the segment granularity to figure out what
// interval we need, but we might not have already locked it.
interval = granularitySpec.getSegmentGranularity().bucket(timestamp);
version = ParallelIndexSupervisorTask.findVersion(versions, interval);
if (version == null) {
final int maxAllowedLockCount = getIngestionSchema().getTuningConfig().getMaxAllowedLockCount();
if (maxAllowedLockCount >= 0 && locks.size() >= maxAllowedLockCount) {
throw new MaxAllowedLocksExceededException(maxAllowedLockCount);
}
// We don't have a lock for this interval, so we should lock it now.
final TaskLock lock = Preconditions.checkNotNull(getToolbox().getTaskActionClient().submit(new TimeChunkLockTryAcquireAction(TaskLockType.EXCLUSIVE, interval)), "Cannot acquire a lock for interval[%s]", interval);
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", interval));
}
version = lock.getVersion();
}
}
return new NonnullPair<>(interval, version);
}
use of org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException in project druid by druid-io.
the class TaskQueue method manageInternal.
@VisibleForTesting
void manageInternal() {
// Task futures available from the taskRunner
final Map<String, ListenableFuture<TaskStatus>> runnerTaskFutures = new HashMap<>();
for (final TaskRunnerWorkItem workItem : taskRunner.getKnownTasks()) {
runnerTaskFutures.put(workItem.getTaskId(), workItem.getResult());
}
// Copy tasks list, as notifyStatus may modify it.
for (final Task task : ImmutableList.copyOf(tasks)) {
if (!taskFutures.containsKey(task.getId())) {
final ListenableFuture<TaskStatus> runnerTaskFuture;
if (runnerTaskFutures.containsKey(task.getId())) {
runnerTaskFuture = runnerTaskFutures.get(task.getId());
} else {
// Task should be running, so run it.
final boolean taskIsReady;
try {
taskIsReady = task.isReady(taskActionClientFactory.create(task));
} catch (Exception e) {
log.warn(e, "Exception thrown during isReady for task: %s", task.getId());
final String errorMessage;
if (e instanceof MaxAllowedLocksExceededException) {
errorMessage = e.getMessage();
} else {
errorMessage = "Failed while waiting for the task to be ready to run. " + "See overlord logs for more details.";
}
notifyStatus(task, TaskStatus.failure(task.getId(), errorMessage), errorMessage);
continue;
}
if (taskIsReady) {
log.info("Asking taskRunner to run: %s", task.getId());
runnerTaskFuture = taskRunner.run(task);
} else {
// Task.isReady() can internally lock intervals or segments.
// We should release them if the task is not ready.
taskLockbox.unlockAll(task);
continue;
}
}
taskFutures.put(task.getId(), attachCallbacks(task, runnerTaskFuture));
} else if (isTaskPending(task)) {
// if the taskFutures contain this task and this task is pending, also let the taskRunner
// to run it to guarantee it will be assigned to run
// see https://github.com/apache/druid/pull/6991
taskRunner.run(task);
}
}
// Kill tasks that shouldn't be running
final Set<String> knownTaskIds = tasks.stream().map(Task::getId).collect(Collectors.toSet());
final Set<String> tasksToKill = Sets.difference(runnerTaskFutures.keySet(), knownTaskIds);
if (!tasksToKill.isEmpty()) {
log.info("Asking taskRunner to clean up %,d tasks.", tasksToKill.size());
for (final String taskId : tasksToKill) {
try {
taskRunner.shutdown(taskId, "task is not in knownTaskIds[%s]", knownTaskIds);
} catch (Exception e) {
log.warn(e, "TaskRunner failed to clean up task: %s", taskId);
}
}
}
}
use of org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException in project druid by druid-io.
the class AbstractBatchIndexTask method tryTimeChunkLock.
protected boolean tryTimeChunkLock(TaskActionClient client, List<Interval> intervals) throws IOException {
// The given intervals are first converted to align with segment granularity. This is because,
// when an overwriting task finds a version for a given input row, it expects the interval
// associated to each version to be equal or larger than the time bucket where the input row falls in.
// See ParallelIndexSupervisorTask.findVersion().
final Iterator<Interval> intervalIterator;
final Granularity segmentGranularity = getSegmentGranularity();
if (segmentGranularity == null) {
intervalIterator = JodaUtils.condenseIntervals(intervals).iterator();
} else {
IntervalsByGranularity intervalsByGranularity = new IntervalsByGranularity(intervals, segmentGranularity);
// the following is calling a condense that does not materialize the intervals:
intervalIterator = JodaUtils.condensedIntervalsIterator(intervalsByGranularity.granularityIntervalsIterator());
}
// Intervals are already condensed to avoid creating too many locks.
// Intervals are also sorted and thus it's safe to compare only the previous interval and current one for dedup.
Interval prev = null;
int locksAcquired = 0;
while (intervalIterator.hasNext()) {
final Interval cur = intervalIterator.next();
if (prev != null && cur.equals(prev)) {
continue;
}
if (maxAllowedLockCount >= 0 && locksAcquired >= maxAllowedLockCount) {
throw new MaxAllowedLocksExceededException(maxAllowedLockCount);
}
prev = cur;
final TaskLock lock = client.submit(new TimeChunkLockTryAcquireAction(TaskLockType.EXCLUSIVE, cur));
if (lock == null) {
return false;
}
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", cur));
}
locksAcquired++;
}
return true;
}
Aggregations