use of org.apache.druid.indexing.common.TaskLock in project druid by druid-io.
the class SegmentTransactionalInsertAction method perform.
/**
* Performs some sanity checks and publishes the given segments.
*/
@Override
public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) {
final SegmentPublishResult retVal;
if (segments.isEmpty()) {
// but still needs to update metadata with the progress that the task made.
try {
retVal = toolbox.getIndexerMetadataStorageCoordinator().commitMetadataOnly(dataSource, startMetadata, endMetadata);
} catch (Exception e) {
throw new RuntimeException(e);
}
return retVal;
}
final Set<DataSegment> allSegments = new HashSet<>(segments);
if (segmentsToBeOverwritten != null) {
allSegments.addAll(segmentsToBeOverwritten);
}
if (segmentsToBeDropped != null) {
allSegments.addAll(segmentsToBeDropped);
}
TaskLocks.checkLockCoversSegments(task, toolbox.getTaskLockbox(), allSegments);
if (segmentsToBeOverwritten != null && !segmentsToBeOverwritten.isEmpty()) {
final List<TaskLock> locks = toolbox.getTaskLockbox().findLocksForTask(task);
// Let's do some sanity check that newSegments can overwrite oldSegments.
if (locks.get(0).getGranularity() == LockGranularity.SEGMENT) {
checkWithSegmentLock();
}
}
try {
retVal = toolbox.getTaskLockbox().doInCriticalSection(task, allSegments.stream().map(DataSegment::getInterval).collect(Collectors.toList()), CriticalAction.<SegmentPublishResult>builder().onValidLocks(() -> toolbox.getIndexerMetadataStorageCoordinator().announceHistoricalSegments(segments, segmentsToBeDropped, startMetadata, endMetadata)).onInvalidLocks(() -> SegmentPublishResult.fail("Invalid task locks. Maybe they are revoked by a higher priority task." + " Please check the overlord log for details.")).build());
} catch (Exception e) {
throw new RuntimeException(e);
}
// Emit metrics
final ServiceMetricEvent.Builder metricBuilder = new ServiceMetricEvent.Builder();
IndexTaskUtils.setTaskDimensions(metricBuilder, task);
if (retVal.isSuccess()) {
toolbox.getEmitter().emit(metricBuilder.build("segment/txn/success", 1));
} else {
toolbox.getEmitter().emit(metricBuilder.build("segment/txn/failure", 1));
}
// getSegments() should return an empty set if announceHistoricalSegments() failed
for (DataSegment segment : retVal.getSegments()) {
metricBuilder.setDimension(DruidMetrics.INTERVAL, segment.getInterval().toString());
metricBuilder.setDimension(DruidMetrics.PARTITIONING_TYPE, segment.getShardSpec() == null ? null : segment.getShardSpec().getType());
toolbox.getEmitter().emit(metricBuilder.build("segment/added/bytes", segment.getSize()));
}
return retVal;
}
use of org.apache.druid.indexing.common.TaskLock in project druid by druid-io.
the class ArchiveTask method run.
@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
final TaskLock myLock = getAndCheckLock(toolbox);
// List unused segments
final List<DataSegment> unusedSegments = toolbox.getTaskActionClient().submit(new RetrieveUnusedSegmentsAction(myLock.getDataSource(), myLock.getInterval()));
// Verify none of these segments have versions > lock version
for (final DataSegment unusedSegment : unusedSegments) {
if (unusedSegment.getVersion().compareTo(myLock.getVersion()) > 0) {
throw new ISE("Unused segment[%s] has version[%s] > task version[%s]", unusedSegment.getId(), unusedSegment.getVersion(), myLock.getVersion());
}
log.info("OK to archive segment: %s", unusedSegment.getId());
}
// Move segments
for (DataSegment segment : unusedSegments) {
final DataSegment archivedSegment = toolbox.getDataSegmentArchiver().archive(segment);
if (archivedSegment != null) {
toolbox.getTaskActionClient().submit(new SegmentMetadataUpdateAction(ImmutableSet.of(archivedSegment)));
} else {
log.info("No action was taken for [%s]", segment.getId());
}
}
return TaskStatus.success(getId());
}
use of org.apache.druid.indexing.common.TaskLock in project druid by druid-io.
the class HadoopIndexTask method runInternal.
@SuppressWarnings("unchecked")
private TaskStatus runInternal(TaskToolbox toolbox) throws Exception {
boolean indexGeneratorJobAttempted = false;
boolean indexGeneratorJobSuccess = false;
HadoopIngestionSpec indexerSchema = null;
try {
registerResourceCloserOnAbnormalExit(config -> killHadoopJob());
String hadoopJobIdFile = getHadoopJobIdFileName();
final ClassLoader loader = buildClassLoader(toolbox);
boolean determineIntervals = spec.getDataSchema().getGranularitySpec().inputIntervals().isEmpty();
HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed(spec, jsonMapper, new OverlordActionBasedUsedSegmentsRetriever(toolbox));
Object determinePartitionsInnerProcessingRunner = getForeignClassloaderObject("org.apache.druid.indexing.common.task.HadoopIndexTask$HadoopDetermineConfigInnerProcessingRunner", loader);
determinePartitionsStatsGetter = new InnerProcessingStatsGetter(determinePartitionsInnerProcessingRunner);
String[] determinePartitionsInput = new String[] { toolbox.getJsonMapper().writeValueAsString(spec), toolbox.getConfig().getHadoopWorkingPath(), toolbox.getSegmentPusher().getPathForHadoop(), hadoopJobIdFile };
final ClassLoader oldLoader = Thread.currentThread().getContextClassLoader();
Class<?> determinePartitionsRunnerClass = determinePartitionsInnerProcessingRunner.getClass();
Method determinePartitionsInnerProcessingRunTask = determinePartitionsRunnerClass.getMethod("runTask", determinePartitionsInput.getClass());
try {
Thread.currentThread().setContextClassLoader(loader);
ingestionState = IngestionState.DETERMINE_PARTITIONS;
final String determineConfigStatusString = (String) determinePartitionsInnerProcessingRunTask.invoke(determinePartitionsInnerProcessingRunner, new Object[] { determinePartitionsInput });
determineConfigStatus = toolbox.getJsonMapper().readValue(determineConfigStatusString, HadoopDetermineConfigInnerProcessingStatus.class);
indexerSchema = determineConfigStatus.getSchema();
if (indexerSchema == null) {
errorMsg = determineConfigStatus.getErrorMsg();
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.failure(getId(), errorMsg);
}
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
Thread.currentThread().setContextClassLoader(oldLoader);
}
// We should have a lock from before we started running only if interval was specified
String version;
if (determineIntervals) {
Interval interval = JodaUtils.umbrellaInterval(JodaUtils.condenseIntervals(indexerSchema.getDataSchema().getGranularitySpec().sortedBucketIntervals()));
final long lockTimeoutMs = getContextValue(Tasks.LOCK_TIMEOUT_KEY, Tasks.DEFAULT_LOCK_TIMEOUT_MILLIS);
// Note: if lockTimeoutMs is larger than ServerConfig.maxIdleTime, the below line can incur http timeout error.
final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, interval, lockTimeoutMs)), "Cannot acquire a lock for interval[%s]", interval);
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", interval));
}
version = lock.getVersion();
} else {
Iterable<TaskLock> locks = getTaskLocks(toolbox.getTaskActionClient());
final TaskLock myLock = Iterables.getOnlyElement(locks);
version = myLock.getVersion();
}
final String specVersion = indexerSchema.getTuningConfig().getVersion();
if (indexerSchema.getTuningConfig().isUseExplicitVersion()) {
if (specVersion.compareTo(version) < 0) {
version = specVersion;
} else {
String errMsg = StringUtils.format("Spec version can not be greater than or equal to the lock version, Spec version: [%s] Lock version: [%s].", specVersion, version);
log.error(errMsg);
toolbox.getTaskReportFileWriter().write(getId(), null);
return TaskStatus.failure(getId(), errMsg);
}
}
log.info("Setting version to: %s", version);
Object innerProcessingRunner = getForeignClassloaderObject("org.apache.druid.indexing.common.task.HadoopIndexTask$HadoopIndexGeneratorInnerProcessingRunner", loader);
buildSegmentsStatsGetter = new InnerProcessingStatsGetter(innerProcessingRunner);
String[] buildSegmentsInput = new String[] { toolbox.getJsonMapper().writeValueAsString(indexerSchema), version, hadoopJobIdFile };
Class<?> buildSegmentsRunnerClass = innerProcessingRunner.getClass();
Method innerProcessingRunTask = buildSegmentsRunnerClass.getMethod("runTask", buildSegmentsInput.getClass());
try {
Thread.currentThread().setContextClassLoader(loader);
ingestionState = IngestionState.BUILD_SEGMENTS;
indexGeneratorJobAttempted = true;
final String jobStatusString = (String) innerProcessingRunTask.invoke(innerProcessingRunner, new Object[] { buildSegmentsInput });
buildSegmentsStatus = toolbox.getJsonMapper().readValue(jobStatusString, HadoopIndexGeneratorInnerProcessingStatus.class);
List<DataSegmentAndIndexZipFilePath> dataSegmentAndIndexZipFilePaths = buildSegmentsStatus.getDataSegmentAndIndexZipFilePaths();
if (dataSegmentAndIndexZipFilePaths != null) {
indexGeneratorJobSuccess = true;
renameSegmentIndexFilesJob(toolbox.getJsonMapper().writeValueAsString(indexerSchema), toolbox.getJsonMapper().writeValueAsString(dataSegmentAndIndexZipFilePaths));
ArrayList<DataSegment> segments = new ArrayList<>(dataSegmentAndIndexZipFilePaths.stream().map(DataSegmentAndIndexZipFilePath::getSegment).collect(Collectors.toList()));
toolbox.publishSegments(segments);
// for awaitSegmentAvailabilityTimeoutMillis
if (spec.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis() > 0) {
ingestionState = IngestionState.SEGMENT_AVAILABILITY_WAIT;
waitForSegmentAvailability(toolbox, segments, spec.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis());
}
ingestionState = IngestionState.COMPLETED;
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.success(getId());
} else {
errorMsg = buildSegmentsStatus.getErrorMsg();
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.failure(getId(), errorMsg);
}
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
Thread.currentThread().setContextClassLoader(oldLoader);
}
} finally {
indexerGeneratorCleanupJob(indexGeneratorJobAttempted, indexGeneratorJobSuccess, indexerSchema == null ? null : toolbox.getJsonMapper().writeValueAsString(indexerSchema));
}
}
use of org.apache.druid.indexing.common.TaskLock in project druid by druid-io.
the class RealtimeIndexTask method run.
@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
runThread = Thread.currentThread();
if (this.plumber != null) {
throw new IllegalStateException("Plumber must be null");
}
setupTimeoutAlert();
boolean normalExit = true;
// It would be nice to get the PlumberSchool in the constructor. Although that will need jackson injectables for
// stuff like the ServerView, which seems kind of odd? Perhaps revisit this when Guice has been introduced.
final SegmentPublisher segmentPublisher = new TaskActionSegmentPublisher(toolbox);
// NOTE: We talk to the coordinator in various places in the plumber and we could be more robust to issues
// with the coordinator. Right now, we'll block/throw in whatever thread triggered the coordinator behavior,
// which will typically be either the main data processing loop or the persist thread.
// Wrap default DataSegmentAnnouncer such that we unlock intervals as we unannounce segments
final long lockTimeoutMs = getContextValue(Tasks.LOCK_TIMEOUT_KEY, Tasks.DEFAULT_LOCK_TIMEOUT_MILLIS);
// Note: if lockTimeoutMs is larger than ServerConfig.maxIdleTime, http timeout error can occur while waiting for a
// lock to be acquired.
final DataSegmentAnnouncer lockingSegmentAnnouncer = new DataSegmentAnnouncer() {
@Override
public void announceSegment(final DataSegment segment) throws IOException {
// Side effect: Calling announceSegment causes a lock to be acquired
final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segment.getInterval(), lockTimeoutMs)), "Cannot acquire a lock for interval[%s]", segment.getInterval());
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segment.getInterval()));
}
toolbox.getSegmentAnnouncer().announceSegment(segment);
}
@Override
public void unannounceSegment(final DataSegment segment) throws IOException {
try {
toolbox.getSegmentAnnouncer().unannounceSegment(segment);
} finally {
toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
}
}
@Override
public void announceSegments(Iterable<DataSegment> segments) throws IOException {
// Side effect: Calling announceSegments causes locks to be acquired
for (DataSegment segment : segments) {
final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segment.getInterval(), lockTimeoutMs)), "Cannot acquire a lock for interval[%s]", segment.getInterval());
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segment.getInterval()));
}
}
toolbox.getSegmentAnnouncer().announceSegments(segments);
}
@Override
public void unannounceSegments(Iterable<DataSegment> segments) throws IOException {
try {
toolbox.getSegmentAnnouncer().unannounceSegments(segments);
} finally {
for (DataSegment segment : segments) {
toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
}
}
}
};
// NOTE: getVersion will block if there is lock contention, which will block plumber.getSink
// NOTE: (and thus the firehose)
// Shouldn't usually happen, since we don't expect people to submit tasks that intersect with the
// realtime window, but if they do it can be problematic. If we decide to care, we can use more threads in
// the plumber such that waiting for the coordinator doesn't block data processing.
final VersioningPolicy versioningPolicy = new VersioningPolicy() {
@Override
public String getVersion(final Interval interval) {
try {
// Side effect: Calling getVersion causes a lock to be acquired
final TimeChunkLockAcquireAction action = new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, interval, lockTimeoutMs);
final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(action), "Cannot acquire a lock for interval[%s]", interval);
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", interval));
}
return lock.getVersion();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
DataSchema dataSchema = spec.getDataSchema();
RealtimeIOConfig realtimeIOConfig = spec.getIOConfig();
RealtimeTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir()).withVersioningPolicy(versioningPolicy);
final FireDepartment fireDepartment = new FireDepartment(dataSchema, realtimeIOConfig, tuningConfig);
this.metrics = fireDepartment.getMetrics();
final RealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartment);
this.queryRunnerFactoryConglomerate = toolbox.getQueryRunnerFactoryConglomerate();
// NOTE: This pusher selects path based purely on global configuration and the DataSegment, which means
// NOTE: that redundant realtime tasks will upload to the same location. This can cause index.zip
// NOTE: (partitionNum_index.zip for HDFS data storage) to mismatch, or it can cause historical nodes to load
// NOTE: different instances of the "same" segment.
final PlumberSchool plumberSchool = new RealtimePlumberSchool(toolbox.getEmitter(), toolbox.getQueryRunnerFactoryConglomerate(), toolbox.getSegmentPusher(), lockingSegmentAnnouncer, segmentPublisher, toolbox.getSegmentHandoffNotifierFactory(), toolbox.getQueryProcessingPool(), toolbox.getJoinableFactory(), toolbox.getIndexMergerV9(), toolbox.getIndexIO(), toolbox.getCache(), toolbox.getCacheConfig(), toolbox.getCachePopulatorStats(), toolbox.getJsonMapper());
this.plumber = plumberSchool.findPlumber(dataSchema, tuningConfig, metrics);
final Supplier<Committer> committerSupplier = Committers.nilSupplier();
LookupNodeService lookupNodeService = getContextValue(CTX_KEY_LOOKUP_TIER) == null ? toolbox.getLookupNodeService() : new LookupNodeService((String) getContextValue(CTX_KEY_LOOKUP_TIER));
DiscoveryDruidNode discoveryDruidNode = new DiscoveryDruidNode(toolbox.getDruidNode(), NodeRole.PEON, ImmutableMap.of(toolbox.getDataNodeService().getName(), toolbox.getDataNodeService(), lookupNodeService.getName(), lookupNodeService));
try {
toolbox.getDataSegmentServerAnnouncer().announce();
toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
plumber.startJob();
// Set up metrics emission
toolbox.addMonitor(metricsMonitor);
// Delay firehose connection to avoid claiming input resources while the plumber is starting up.
final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
// Skip connecting firehose if we've been stopped before we got started.
synchronized (this) {
if (!gracefullyStopped) {
firehose = firehoseFactory.connect(Preconditions.checkNotNull(spec.getDataSchema().getParser(), "inputRowParser"), toolbox.getIndexingTmpDir());
}
}
// Time to read data!
while (firehose != null && (!gracefullyStopped || firehoseDrainableByClosing) && firehose.hasMore()) {
Plumbers.addNextRow(committerSupplier, firehose, plumber, tuningConfig.isReportParseExceptions(), metrics);
}
} catch (Throwable e) {
normalExit = false;
log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
throw e;
} finally {
if (normalExit) {
try {
// Persist if we had actually started.
if (firehose != null) {
log.info("Persisting remaining data.");
final Committer committer = committerSupplier.get();
final CountDownLatch persistLatch = new CountDownLatch(1);
plumber.persist(new Committer() {
@Override
public Object getMetadata() {
return committer.getMetadata();
}
@Override
public void run() {
try {
committer.run();
} finally {
persistLatch.countDown();
}
}
});
persistLatch.await();
}
if (gracefullyStopped) {
log.info("Gracefully stopping.");
} else {
log.info("Finishing the job.");
synchronized (this) {
if (gracefullyStopped) {
// Someone called stopGracefully after we checked the flag. That's okay, just stop now.
log.info("Gracefully stopping.");
} else {
finishingJob = true;
}
}
if (finishingJob) {
plumber.finishJob();
}
}
} catch (InterruptedException e) {
log.debug(e, "Interrupted while finishing the job");
} catch (Exception e) {
log.makeAlert(e, "Failed to finish realtime task").emit();
throw e;
} finally {
if (firehose != null) {
CloseableUtils.closeAndSuppressExceptions(firehose, e -> log.warn("Failed to close Firehose"));
}
toolbox.removeMonitor(metricsMonitor);
}
}
toolbox.getDataSegmentServerAnnouncer().unannounce();
toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
}
log.info("Job done!");
return TaskStatus.success(getId());
}
use of org.apache.druid.indexing.common.TaskLock in project druid by druid-io.
the class RestoreTask method run.
@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
final TaskLock myLock = getAndCheckLock(toolbox);
// List unused segments
final List<DataSegment> unusedSegments = toolbox.getTaskActionClient().submit(new RetrieveUnusedSegmentsAction(myLock.getDataSource(), myLock.getInterval()));
// Verify none of these segments have versions > lock version
for (final DataSegment unusedSegment : unusedSegments) {
if (unusedSegment.getVersion().compareTo(myLock.getVersion()) > 0) {
throw new ISE("Unused segment[%s] has version[%s] > task version[%s]", unusedSegment.getId(), unusedSegment.getVersion(), myLock.getVersion());
}
log.info("OK to restore segment: %s", unusedSegment.getId());
}
final List<DataSegment> restoredSegments = new ArrayList<>();
// Move segments
for (DataSegment segment : unusedSegments) {
final DataSegment restored = toolbox.getDataSegmentArchiver().restore(segment);
if (restored != null) {
restoredSegments.add(restored);
} else {
log.info("Segment [%s] did not move, not updating metadata", segment.getId());
}
}
if (restoredSegments.isEmpty()) {
log.info("No segments restored");
} else {
// Update metadata for moved segments
toolbox.getTaskActionClient().submit(new SegmentMetadataUpdateAction(ImmutableSet.copyOf(restoredSegments)));
}
return TaskStatus.success(getId());
}
Aggregations