use of org.apache.druid.indexing.common.TaskLock in project druid by druid-io.
the class HadoopIndexTask method isReady.
@Override
public boolean isReady(TaskActionClient taskActionClient) throws Exception {
Iterable<Interval> intervals = spec.getDataSchema().getGranularitySpec().sortedBucketIntervals();
if (intervals.iterator().hasNext()) {
Interval interval = JodaUtils.umbrellaInterval(JodaUtils.condenseIntervals(intervals));
final TaskLock lock = taskActionClient.submit(new TimeChunkLockTryAcquireAction(TaskLockType.EXCLUSIVE, interval));
if (lock == null) {
return false;
}
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", interval));
}
return true;
} else {
return true;
}
}
use of org.apache.druid.indexing.common.TaskLock in project druid by druid-io.
the class TaskLocks method findLocksForSegments.
public static List<TaskLock> findLocksForSegments(final Task task, final TaskLockbox taskLockbox, final Collection<DataSegment> segments) {
final NavigableMap<DateTime, List<TaskLock>> taskLockMap = getTaskLockMap(taskLockbox, task);
if (taskLockMap.isEmpty()) {
return Collections.emptyList();
}
final List<TaskLock> found = new ArrayList<>();
segments.forEach(segment -> {
final Entry<DateTime, List<TaskLock>> entry = taskLockMap.floorEntry(segment.getInterval().getStart());
if (entry == null) {
throw new ISE("Can't find lock for the interval of segment[%s]", segment.getId());
}
final List<TaskLock> locks = entry.getValue();
locks.forEach(lock -> {
if (lock.getGranularity() == LockGranularity.TIME_CHUNK) {
final TimeChunkLock timeChunkLock = (TimeChunkLock) lock;
if (timeChunkLock.getInterval().contains(segment.getInterval()) && timeChunkLock.getDataSource().equals(segment.getDataSource()) && timeChunkLock.getVersion().compareTo(segment.getVersion()) >= 0) {
found.add(lock);
}
} else {
final SegmentLock segmentLock = (SegmentLock) lock;
if (segmentLock.getInterval().contains(segment.getInterval()) && segmentLock.getDataSource().equals(segment.getDataSource()) && segmentLock.getVersion().compareTo(segment.getVersion()) >= 0 && segmentLock.getPartitionId() == segment.getShardSpec().getPartitionNum()) {
found.add(lock);
}
}
});
});
return found;
}
use of org.apache.druid.indexing.common.TaskLock in project druid by druid-io.
the class SinglePhaseParallelIndexTaskRunner method findIntervalAndVersion.
private NonnullPair<Interval, String> findIntervalAndVersion(DateTime timestamp) throws IOException {
final GranularitySpec granularitySpec = getIngestionSchema().getDataSchema().getGranularitySpec();
// This method is called whenever subtasks need to allocate a new segment via the supervisor task.
// As a result, this code is never called in the Overlord. For now using the materialized intervals
// here is ok for performance reasons
final Set<Interval> materializedBucketIntervals = granularitySpec.materializedBucketIntervals();
// List locks whenever allocating a new segment because locks might be revoked and no longer valid.
final List<TaskLock> locks = getToolbox().getTaskActionClient().submit(new LockListAction());
final TaskLock revokedLock = locks.stream().filter(TaskLock::isRevoked).findAny().orElse(null);
if (revokedLock != null) {
throw new ISE("Lock revoked: [%s]", revokedLock);
}
final Map<Interval, String> versions = locks.stream().collect(Collectors.toMap(TaskLock::getInterval, TaskLock::getVersion));
Interval interval;
String version;
if (!materializedBucketIntervals.isEmpty()) {
// If granularity spec has explicit intervals, we just need to find the version associated to the interval.
// This is because we should have gotten all required locks up front when the task starts up.
final Optional<Interval> maybeInterval = granularitySpec.bucketInterval(timestamp);
if (!maybeInterval.isPresent()) {
throw new IAE("Could not find interval for timestamp [%s]", timestamp);
}
interval = maybeInterval.get();
if (!materializedBucketIntervals.contains(interval)) {
throw new ISE("Unspecified interval[%s] in granularitySpec[%s]", interval, granularitySpec);
}
version = ParallelIndexSupervisorTask.findVersion(versions, interval);
if (version == null) {
throw new ISE("Cannot find a version for interval[%s]", interval);
}
} else {
// We don't have explicit intervals. We can use the segment granularity to figure out what
// interval we need, but we might not have already locked it.
interval = granularitySpec.getSegmentGranularity().bucket(timestamp);
version = ParallelIndexSupervisorTask.findVersion(versions, interval);
if (version == null) {
final int maxAllowedLockCount = getIngestionSchema().getTuningConfig().getMaxAllowedLockCount();
if (maxAllowedLockCount >= 0 && locks.size() >= maxAllowedLockCount) {
throw new MaxAllowedLocksExceededException(maxAllowedLockCount);
}
// We don't have a lock for this interval, so we should lock it now.
final TaskLock lock = Preconditions.checkNotNull(getToolbox().getTaskActionClient().submit(new TimeChunkLockTryAcquireAction(TaskLockType.EXCLUSIVE, interval)), "Cannot acquire a lock for interval[%s]", interval);
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", interval));
}
version = lock.getVersion();
}
}
return new NonnullPair<>(interval, version);
}
use of org.apache.druid.indexing.common.TaskLock in project druid by druid-io.
the class PartialSegmentMergeTask method runTask.
@Override
public TaskStatus runTask(TaskToolbox toolbox) throws Exception {
// Group partitionLocations by interval and partitionId
final Map<Interval, Int2ObjectMap<List<PartitionLocation>>> intervalToBuckets = new HashMap<>();
for (PartitionLocation location : ioConfig.getPartitionLocations()) {
intervalToBuckets.computeIfAbsent(location.getInterval(), k -> new Int2ObjectOpenHashMap<>()).computeIfAbsent(location.getBucketId(), k -> new ArrayList<>()).add(location);
}
final List<TaskLock> locks = toolbox.getTaskActionClient().submit(new SurrogateAction<>(supervisorTaskId, new LockListAction()));
final Map<Interval, String> intervalToVersion = Maps.newHashMapWithExpectedSize(locks.size());
locks.forEach(lock -> {
if (lock.isRevoked()) {
throw new ISE("Lock[%s] is revoked", lock);
}
final String mustBeNull = intervalToVersion.put(lock.getInterval(), lock.getVersion());
if (mustBeNull != null) {
throw new ISE("Unexpected state: Two versions([%s], [%s]) for the same interval[%s]", lock.getVersion(), mustBeNull, lock.getInterval());
}
});
final Stopwatch fetchStopwatch = Stopwatch.createStarted();
final Map<Interval, Int2ObjectMap<List<File>>> intervalToUnzippedFiles = fetchSegmentFiles(toolbox, intervalToBuckets);
final long fetchTime = fetchStopwatch.elapsed(TimeUnit.SECONDS);
fetchStopwatch.stop();
LOG.info("Fetch took [%s] seconds", fetchTime);
final ParallelIndexSupervisorTaskClient taskClient = toolbox.getSupervisorTaskClientFactory().build(new ClientBasedTaskInfoProvider(toolbox.getIndexingServiceClient()), getId(), // always use a single http thread
1, getTuningConfig().getChatHandlerTimeout(), getTuningConfig().getChatHandlerNumRetries());
final File persistDir = toolbox.getPersistDir();
org.apache.commons.io.FileUtils.deleteQuietly(persistDir);
FileUtils.mkdirp(persistDir);
final Set<DataSegment> pushedSegments = mergeAndPushSegments(toolbox, getDataSchema(), getTuningConfig(), persistDir, intervalToVersion, intervalToUnzippedFiles);
taskClient.report(supervisorTaskId, new PushedSegmentsReport(getId(), Collections.emptySet(), pushedSegments, ImmutableMap.of()));
return TaskStatus.success(getId());
}
use of org.apache.druid.indexing.common.TaskLock in project druid by druid-io.
the class TaskLockbox method syncFromStorage.
/**
* Wipe out our current in-memory state and resync it from our bundled {@link TaskStorage}.
*/
public void syncFromStorage() {
giant.lock();
try {
// Load stuff from taskStorage first. If this fails, we don't want to lose all our locks.
final Set<String> storedActiveTasks = new HashSet<>();
final List<Pair<Task, TaskLock>> storedLocks = new ArrayList<>();
for (final Task task : taskStorage.getActiveTasks()) {
storedActiveTasks.add(task.getId());
for (final TaskLock taskLock : taskStorage.getLocks(task.getId())) {
storedLocks.add(Pair.of(task, taskLock));
}
}
// Sort locks by version, so we add them back in the order they were acquired.
final Ordering<Pair<Task, TaskLock>> byVersionOrdering = new Ordering<Pair<Task, TaskLock>>() {
@Override
public int compare(Pair<Task, TaskLock> left, Pair<Task, TaskLock> right) {
// The second compare shouldn't be necessary, but, whatever.
return ComparisonChain.start().compare(left.rhs.getVersion(), right.rhs.getVersion()).compare(left.lhs.getId(), right.lhs.getId()).result();
}
};
running.clear();
activeTasks.clear();
activeTasks.addAll(storedActiveTasks);
// Bookkeeping for a log message at the end
int taskLockCount = 0;
for (final Pair<Task, TaskLock> taskAndLock : byVersionOrdering.sortedCopy(storedLocks)) {
final Task task = Preconditions.checkNotNull(taskAndLock.lhs, "task");
final TaskLock savedTaskLock = Preconditions.checkNotNull(taskAndLock.rhs, "savedTaskLock");
if (savedTaskLock.getInterval().toDurationMillis() <= 0) {
// "Impossible", but you never know what crazy stuff can be restored from storage.
log.warn("Ignoring lock[%s] with empty interval for task: %s", savedTaskLock, task.getId());
continue;
}
// Create a new taskLock if it doesn't have a proper priority,
// so that every taskLock in memory has the priority.
final TaskLock savedTaskLockWithPriority = savedTaskLock.getPriority() == null ? savedTaskLock.withPriority(task.getPriority()) : savedTaskLock;
final TaskLockPosse taskLockPosse = verifyAndCreateOrFindLockPosse(task, savedTaskLockWithPriority);
if (taskLockPosse != null) {
taskLockPosse.addTask(task);
final TaskLock taskLock = taskLockPosse.getTaskLock();
if (savedTaskLockWithPriority.getVersion().equals(taskLock.getVersion())) {
taskLockCount++;
log.info("Reacquired lock[%s] for task: %s", taskLock, task.getId());
} else {
taskLockCount++;
log.info("Could not reacquire lock on interval[%s] version[%s] (got version[%s] instead) for task: %s", savedTaskLockWithPriority.getInterval(), savedTaskLockWithPriority.getVersion(), taskLock.getVersion(), task.getId());
}
} else {
throw new ISE("Could not reacquire lock on interval[%s] version[%s] for task: %s", savedTaskLockWithPriority.getInterval(), savedTaskLockWithPriority.getVersion(), task.getId());
}
}
log.info("Synced %,d locks for %,d activeTasks from storage (%,d locks ignored).", taskLockCount, activeTasks.size(), storedLocks.size() - taskLockCount);
} finally {
giant.unlock();
}
}
Aggregations