use of io.druid.indexing.common.TaskLock in project druid by druid-io.
the class RealtimeIndexTask method run.
@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
runThread = Thread.currentThread();
if (this.plumber != null) {
throw new IllegalStateException("WTF?!? run with non-null plumber??!");
}
boolean normalExit = true;
// It would be nice to get the PlumberSchool in the constructor. Although that will need jackson injectables for
// stuff like the ServerView, which seems kind of odd? Perhaps revisit this when Guice has been introduced.
final SegmentPublisher segmentPublisher = new TaskActionSegmentPublisher(this, toolbox);
// NOTE: We talk to the coordinator in various places in the plumber and we could be more robust to issues
// with the coordinator. Right now, we'll block/throw in whatever thread triggered the coordinator behavior,
// which will typically be either the main data processing loop or the persist thread.
// Wrap default DataSegmentAnnouncer such that we unlock intervals as we unannounce segments
final DataSegmentAnnouncer lockingSegmentAnnouncer = new DataSegmentAnnouncer() {
@Override
public void announceSegment(final DataSegment segment) throws IOException {
// Side effect: Calling announceSegment causes a lock to be acquired
toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
toolbox.getSegmentAnnouncer().announceSegment(segment);
}
@Override
public void unannounceSegment(final DataSegment segment) throws IOException {
try {
toolbox.getSegmentAnnouncer().unannounceSegment(segment);
} finally {
toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
}
}
@Override
public void announceSegments(Iterable<DataSegment> segments) throws IOException {
// Side effect: Calling announceSegments causes locks to be acquired
for (DataSegment segment : segments) {
toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
}
toolbox.getSegmentAnnouncer().announceSegments(segments);
}
@Override
public void unannounceSegments(Iterable<DataSegment> segments) throws IOException {
try {
toolbox.getSegmentAnnouncer().unannounceSegments(segments);
} finally {
for (DataSegment segment : segments) {
toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
}
}
}
@Override
public boolean isAnnounced(DataSegment segment) {
return toolbox.getSegmentAnnouncer().isAnnounced(segment);
}
};
// NOTE: getVersion will block if there is lock contention, which will block plumber.getSink
// NOTE: (and thus the firehose)
// Shouldn't usually happen, since we don't expect people to submit tasks that intersect with the
// realtime window, but if they do it can be problematic. If we decide to care, we can use more threads in
// the plumber such that waiting for the coordinator doesn't block data processing.
final VersioningPolicy versioningPolicy = new VersioningPolicy() {
@Override
public String getVersion(final Interval interval) {
try {
// Side effect: Calling getVersion causes a lock to be acquired
final TaskLock myLock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
return myLock.getVersion();
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
};
DataSchema dataSchema = spec.getDataSchema();
RealtimeIOConfig realtimeIOConfig = spec.getIOConfig();
RealtimeTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(new File(toolbox.getTaskWorkDir(), "persist")).withVersioningPolicy(versioningPolicy);
final FireDepartment fireDepartment = new FireDepartment(dataSchema, realtimeIOConfig, tuningConfig);
this.metrics = fireDepartment.getMetrics();
final RealtimeMetricsMonitor metricsMonitor = new RealtimeMetricsMonitor(ImmutableList.of(fireDepartment), ImmutableMap.of(DruidMetrics.TASK_ID, new String[] { getId() }));
this.queryRunnerFactoryConglomerate = toolbox.getQueryRunnerFactoryConglomerate();
// NOTE: This pusher selects path based purely on global configuration and the DataSegment, which means
// NOTE: that redundant realtime tasks will upload to the same location. This can cause index.zip
// NOTE: (partitionNum_index.zip for HDFS data storage) and descriptor.json (partitionNum_descriptor.json for
// NOTE: HDFS data storage) to mismatch, or it can cause historical nodes to load different instances of
// NOTE: the "same" segment.
final PlumberSchool plumberSchool = new RealtimePlumberSchool(toolbox.getEmitter(), toolbox.getQueryRunnerFactoryConglomerate(), toolbox.getSegmentPusher(), lockingSegmentAnnouncer, segmentPublisher, toolbox.getSegmentHandoffNotifierFactory(), toolbox.getQueryExecutorService(), toolbox.getIndexMerger(), toolbox.getIndexMergerV9(), toolbox.getIndexIO(), toolbox.getCache(), toolbox.getCacheConfig(), toolbox.getObjectMapper());
this.plumber = plumberSchool.findPlumber(dataSchema, tuningConfig, metrics);
Supplier<Committer> committerSupplier = null;
try {
plumber.startJob();
// Set up metrics emission
toolbox.getMonitorScheduler().addMonitor(metricsMonitor);
// Delay firehose connection to avoid claiming input resources while the plumber is starting up.
final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
// Skip connecting firehose if we've been stopped before we got started.
synchronized (this) {
if (!gracefullyStopped) {
firehose = firehoseFactory.connect(spec.getDataSchema().getParser());
committerSupplier = Committers.supplierFromFirehose(firehose);
}
}
// Time to read data!
while (firehose != null && (!gracefullyStopped || firehoseDrainableByClosing) && firehose.hasMore()) {
Plumbers.addNextRow(committerSupplier, firehose, plumber, tuningConfig.isReportParseExceptions(), metrics);
}
} catch (Throwable e) {
normalExit = false;
log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
throw e;
} finally {
if (normalExit) {
try {
// Persist if we had actually started.
if (firehose != null) {
log.info("Persisting remaining data.");
final Committer committer = committerSupplier.get();
final CountDownLatch persistLatch = new CountDownLatch(1);
plumber.persist(new Committer() {
@Override
public Object getMetadata() {
return committer.getMetadata();
}
@Override
public void run() {
try {
committer.run();
} finally {
persistLatch.countDown();
}
}
});
persistLatch.await();
}
if (gracefullyStopped) {
log.info("Gracefully stopping.");
} else {
log.info("Finishing the job.");
synchronized (this) {
if (gracefullyStopped) {
// Someone called stopGracefully after we checked the flag. That's okay, just stop now.
log.info("Gracefully stopping.");
} else {
finishingJob = true;
}
}
if (finishingJob) {
plumber.finishJob();
}
}
} catch (InterruptedException e) {
log.debug(e, "Interrupted while finishing the job");
} catch (Exception e) {
log.makeAlert(e, "Failed to finish realtime task").emit();
throw e;
} finally {
if (firehose != null) {
CloseQuietly.close(firehose);
}
toolbox.getMonitorScheduler().removeMonitor(metricsMonitor);
}
}
}
log.info("Job done!");
return TaskStatus.success(getId());
}
use of io.druid.indexing.common.TaskLock in project druid by druid-io.
the class RestoreTask method run.
@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
// Confirm we have a lock (will throw if there isn't exactly one element)
final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox));
if (!myLock.getDataSource().equals(getDataSource())) {
throw new ISE("WTF?! Lock dataSource[%s] != task dataSource[%s]", myLock.getDataSource(), getDataSource());
}
if (!myLock.getInterval().equals(getInterval())) {
throw new ISE("WTF?! Lock interval[%s] != task interval[%s]", myLock.getInterval(), getInterval());
}
// List unused segments
final List<DataSegment> unusedSegments = toolbox.getTaskActionClient().submit(new SegmentListUnusedAction(myLock.getDataSource(), myLock.getInterval()));
// Verify none of these segments have versions > lock version
for (final DataSegment unusedSegment : unusedSegments) {
if (unusedSegment.getVersion().compareTo(myLock.getVersion()) > 0) {
throw new ISE("WTF?! Unused segment[%s] has version[%s] > task version[%s]", unusedSegment.getIdentifier(), unusedSegment.getVersion(), myLock.getVersion());
}
log.info("OK to restore segment: %s", unusedSegment.getIdentifier());
}
final List<DataSegment> restoredSegments = new ArrayList<>();
// Move segments
for (DataSegment segment : unusedSegments) {
final DataSegment restored = toolbox.getDataSegmentArchiver().restore(segment);
if (restored != null) {
restoredSegments.add(restored);
} else {
log.info("Segment [%s] did not move, not updating metadata", segment);
}
}
if (restoredSegments.isEmpty()) {
log.info("No segments restored");
} else {
// Update metadata for moved segments
toolbox.getTaskActionClient().submit(new SegmentMetadataUpdateAction(ImmutableSet.copyOf(restoredSegments)));
}
return TaskStatus.success(getId());
}
use of io.druid.indexing.common.TaskLock in project druid by druid-io.
the class ArchiveTask method run.
@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
// Confirm we have a lock (will throw if there isn't exactly one element)
final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox));
if (!myLock.getDataSource().equals(getDataSource())) {
throw new ISE("WTF?! Lock dataSource[%s] != task dataSource[%s]", myLock.getDataSource(), getDataSource());
}
if (!myLock.getInterval().equals(getInterval())) {
throw new ISE("WTF?! Lock interval[%s] != task interval[%s]", myLock.getInterval(), getInterval());
}
// List unused segments
final List<DataSegment> unusedSegments = toolbox.getTaskActionClient().submit(new SegmentListUnusedAction(myLock.getDataSource(), myLock.getInterval()));
// Verify none of these segments have versions > lock version
for (final DataSegment unusedSegment : unusedSegments) {
if (unusedSegment.getVersion().compareTo(myLock.getVersion()) > 0) {
throw new ISE("WTF?! Unused segment[%s] has version[%s] > task version[%s]", unusedSegment.getIdentifier(), unusedSegment.getVersion(), myLock.getVersion());
}
log.info("OK to archive segment: %s", unusedSegment.getIdentifier());
}
// Move segments
for (DataSegment segment : unusedSegments) {
final DataSegment archivedSegment = toolbox.getDataSegmentArchiver().archive(segment);
if (archivedSegment != null) {
toolbox.getTaskActionClient().submit(new SegmentMetadataUpdateAction(ImmutableSet.of(archivedSegment)));
} else {
log.info("No action was taken for [%s]", segment);
}
}
return TaskStatus.success(getId());
}
use of io.druid.indexing.common.TaskLock in project druid by druid-io.
the class SegmentAllocateAction method perform.
@Override
public SegmentIdentifier perform(final Task task, final TaskActionToolbox toolbox) throws IOException {
int attempt = 0;
while (true) {
attempt++;
if (!task.getDataSource().equals(dataSource)) {
throw new IAE("Task dataSource must match action dataSource, [%s] != [%s].", task.getDataSource(), dataSource);
}
final IndexerMetadataStorageCoordinator msc = toolbox.getIndexerMetadataStorageCoordinator();
// 1) if something overlaps our timestamp, use that
// 2) otherwise try preferredSegmentGranularity & going progressively smaller
final List<Interval> tryIntervals = Lists.newArrayList();
final Interval rowInterval = queryGranularity.bucket(timestamp);
final Set<DataSegment> usedSegmentsForRow = ImmutableSet.copyOf(msc.getUsedSegmentsForInterval(dataSource, rowInterval));
if (usedSegmentsForRow.isEmpty()) {
// segment granularity. Try that first, and then progressively smaller ones if it fails.
for (Granularity gran : Granularity.granularitiesFinerThan(preferredSegmentGranularity)) {
tryIntervals.add(gran.bucket(timestamp));
}
} else {
// Existing segment(s) exist for this row; use the interval of the first one.
tryIntervals.add(usedSegmentsForRow.iterator().next().getInterval());
}
for (final Interval tryInterval : tryIntervals) {
if (tryInterval.contains(rowInterval)) {
log.debug("Trying to allocate pending segment for rowInterval[%s], segmentInterval[%s].", rowInterval, tryInterval);
final TaskLock tryLock = toolbox.getTaskLockbox().tryLock(task, tryInterval).orNull();
if (tryLock != null) {
final SegmentIdentifier identifier = msc.allocatePendingSegment(dataSource, sequenceName, previousSegmentId, tryInterval, tryLock.getVersion());
if (identifier != null) {
return identifier;
} else {
log.debug("Could not allocate pending segment for rowInterval[%s], segmentInterval[%s].", rowInterval, tryInterval);
}
} else {
log.debug("Could not acquire lock for rowInterval[%s], segmentInterval[%s].", rowInterval, tryInterval);
}
}
}
if (!ImmutableSet.copyOf(msc.getUsedSegmentsForInterval(dataSource, rowInterval)).equals(usedSegmentsForRow)) {
if (attempt < MAX_ATTEMPTS) {
final long shortRandomSleep = 50 + (long) (Math.random() * 450);
log.debug("Used segment set changed for rowInterval[%s]. Retrying segment allocation in %,dms (attempt = %,d).", rowInterval, shortRandomSleep, attempt);
try {
Thread.sleep(shortRandomSleep);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw Throwables.propagate(e);
}
} else {
log.error("Used segment set changed for rowInterval[%s]. Not trying again (attempt = %,d).", rowInterval, attempt);
return null;
}
} else {
return null;
}
}
}
use of io.druid.indexing.common.TaskLock in project druid by druid-io.
the class TaskLockbox method tryLock.
/**
* Attempt to lock a task, without removing it from the queue. Can safely be called multiple times on the same task.
* This method will attempt to assign version strings that obey the invariant that every version string is
* lexicographically greater than any other version string previously assigned to the same interval. This invariant
* is only mostly guaranteed, however; we assume clock monotonicity and we assume that callers specifying
* {@code preferredVersion} are doing the right thing.
*
* @param task task that wants a lock
* @param interval interval to lock
* @param preferredVersion use this version string if one has not yet been assigned
*
* @return lock version if lock was acquired, absent otherwise
* @throws IllegalStateException if the task is not a valid active task
*/
private Optional<TaskLock> tryLock(final Task task, final Interval interval, final Optional<String> preferredVersion) {
giant.lock();
try {
if (!activeTasks.contains(task.getId())) {
throw new ISE("Unable to grant lock to inactive Task [%s]", task.getId());
}
Preconditions.checkArgument(interval.toDurationMillis() > 0, "interval empty");
final String dataSource = task.getDataSource();
final List<TaskLockPosse> foundPosses = findLockPossesForInterval(dataSource, interval);
final TaskLockPosse posseToUse;
if (foundPosses.size() > 1) {
// Too many existing locks.
return Optional.absent();
} else if (foundPosses.size() == 1) {
// One existing lock -- check if we can add to it.
final TaskLockPosse foundPosse = Iterables.getOnlyElement(foundPosses);
if (foundPosse.getTaskLock().getInterval().contains(interval) && foundPosse.getTaskLock().getGroupId().equals(task.getGroupId())) {
posseToUse = foundPosse;
} else {
return Optional.absent();
}
} else {
// No existing locks. We can make a new one.
if (!running.containsKey(dataSource)) {
running.put(dataSource, new TreeMap<Interval, TaskLockPosse>(Comparators.intervalsByStartThenEnd()));
}
// Create new TaskLock and assign it a version.
// Assumption: We'll choose a version that is greater than any previously-chosen version for our interval. (This
// may not always be true, unfortunately. See below.)
final String version;
if (preferredVersion.isPresent()) {
// We have a preferred version. We'll trust our caller to not break our ordering assumptions and just use it.
version = preferredVersion.get();
} else {
// We are running under an interval lock right now, so just using the current time works as long as we can trust
// our clock to be monotonic and have enough resolution since the last time we created a TaskLock for the same
// interval. This may not always be true; to assure it we would need to use some method of timekeeping other
// than the wall clock.
version = new DateTime().toString();
}
posseToUse = new TaskLockPosse(new TaskLock(task.getGroupId(), dataSource, interval, version));
running.get(dataSource).put(interval, posseToUse);
log.info("Created new TaskLockPosse: %s", posseToUse);
}
// Add to existing TaskLockPosse, if necessary
if (posseToUse.getTaskIds().add(task.getId())) {
log.info("Added task[%s] to TaskLock[%s]", task.getId(), posseToUse.getTaskLock().getGroupId());
// Update task storage facility. If it fails, revoke the lock.
try {
taskStorage.addLock(task.getId(), posseToUse.getTaskLock());
return Optional.of(posseToUse.getTaskLock());
} catch (Exception e) {
log.makeAlert("Failed to persist lock in storage").addData("task", task.getId()).addData("dataSource", posseToUse.getTaskLock().getDataSource()).addData("interval", posseToUse.getTaskLock().getInterval()).addData("version", posseToUse.getTaskLock().getVersion()).emit();
unlock(task, interval);
return Optional.absent();
}
} else {
log.info("Task[%s] already present in TaskLock[%s]", task.getId(), posseToUse.getTaskLock().getGroupId());
return Optional.of(posseToUse.getTaskLock());
}
} finally {
giant.unlock();
}
}
Aggregations