use of org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction in project druid by druid-io.
the class RealtimeIndexTask method run.
@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
runThread = Thread.currentThread();
if (this.plumber != null) {
throw new IllegalStateException("Plumber must be null");
}
setupTimeoutAlert();
boolean normalExit = true;
// It would be nice to get the PlumberSchool in the constructor. Although that will need jackson injectables for
// stuff like the ServerView, which seems kind of odd? Perhaps revisit this when Guice has been introduced.
final SegmentPublisher segmentPublisher = new TaskActionSegmentPublisher(toolbox);
// NOTE: We talk to the coordinator in various places in the plumber and we could be more robust to issues
// with the coordinator. Right now, we'll block/throw in whatever thread triggered the coordinator behavior,
// which will typically be either the main data processing loop or the persist thread.
// Wrap default DataSegmentAnnouncer such that we unlock intervals as we unannounce segments
final long lockTimeoutMs = getContextValue(Tasks.LOCK_TIMEOUT_KEY, Tasks.DEFAULT_LOCK_TIMEOUT_MILLIS);
// Note: if lockTimeoutMs is larger than ServerConfig.maxIdleTime, http timeout error can occur while waiting for a
// lock to be acquired.
final DataSegmentAnnouncer lockingSegmentAnnouncer = new DataSegmentAnnouncer() {
@Override
public void announceSegment(final DataSegment segment) throws IOException {
// Side effect: Calling announceSegment causes a lock to be acquired
final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segment.getInterval(), lockTimeoutMs)), "Cannot acquire a lock for interval[%s]", segment.getInterval());
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segment.getInterval()));
}
toolbox.getSegmentAnnouncer().announceSegment(segment);
}
@Override
public void unannounceSegment(final DataSegment segment) throws IOException {
try {
toolbox.getSegmentAnnouncer().unannounceSegment(segment);
} finally {
toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
}
}
@Override
public void announceSegments(Iterable<DataSegment> segments) throws IOException {
// Side effect: Calling announceSegments causes locks to be acquired
for (DataSegment segment : segments) {
final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segment.getInterval(), lockTimeoutMs)), "Cannot acquire a lock for interval[%s]", segment.getInterval());
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segment.getInterval()));
}
}
toolbox.getSegmentAnnouncer().announceSegments(segments);
}
@Override
public void unannounceSegments(Iterable<DataSegment> segments) throws IOException {
try {
toolbox.getSegmentAnnouncer().unannounceSegments(segments);
} finally {
for (DataSegment segment : segments) {
toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
}
}
}
};
// NOTE: getVersion will block if there is lock contention, which will block plumber.getSink
// NOTE: (and thus the firehose)
// Shouldn't usually happen, since we don't expect people to submit tasks that intersect with the
// realtime window, but if they do it can be problematic. If we decide to care, we can use more threads in
// the plumber such that waiting for the coordinator doesn't block data processing.
final VersioningPolicy versioningPolicy = new VersioningPolicy() {
@Override
public String getVersion(final Interval interval) {
try {
// Side effect: Calling getVersion causes a lock to be acquired
final TimeChunkLockAcquireAction action = new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, interval, lockTimeoutMs);
final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(action), "Cannot acquire a lock for interval[%s]", interval);
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", interval));
}
return lock.getVersion();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
DataSchema dataSchema = spec.getDataSchema();
RealtimeIOConfig realtimeIOConfig = spec.getIOConfig();
RealtimeTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir()).withVersioningPolicy(versioningPolicy);
final FireDepartment fireDepartment = new FireDepartment(dataSchema, realtimeIOConfig, tuningConfig);
this.metrics = fireDepartment.getMetrics();
final RealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartment);
this.queryRunnerFactoryConglomerate = toolbox.getQueryRunnerFactoryConglomerate();
// NOTE: This pusher selects path based purely on global configuration and the DataSegment, which means
// NOTE: that redundant realtime tasks will upload to the same location. This can cause index.zip
// NOTE: (partitionNum_index.zip for HDFS data storage) to mismatch, or it can cause historical nodes to load
// NOTE: different instances of the "same" segment.
final PlumberSchool plumberSchool = new RealtimePlumberSchool(toolbox.getEmitter(), toolbox.getQueryRunnerFactoryConglomerate(), toolbox.getSegmentPusher(), lockingSegmentAnnouncer, segmentPublisher, toolbox.getSegmentHandoffNotifierFactory(), toolbox.getQueryProcessingPool(), toolbox.getJoinableFactory(), toolbox.getIndexMergerV9(), toolbox.getIndexIO(), toolbox.getCache(), toolbox.getCacheConfig(), toolbox.getCachePopulatorStats(), toolbox.getJsonMapper());
this.plumber = plumberSchool.findPlumber(dataSchema, tuningConfig, metrics);
final Supplier<Committer> committerSupplier = Committers.nilSupplier();
LookupNodeService lookupNodeService = getContextValue(CTX_KEY_LOOKUP_TIER) == null ? toolbox.getLookupNodeService() : new LookupNodeService((String) getContextValue(CTX_KEY_LOOKUP_TIER));
DiscoveryDruidNode discoveryDruidNode = new DiscoveryDruidNode(toolbox.getDruidNode(), NodeRole.PEON, ImmutableMap.of(toolbox.getDataNodeService().getName(), toolbox.getDataNodeService(), lookupNodeService.getName(), lookupNodeService));
try {
toolbox.getDataSegmentServerAnnouncer().announce();
toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
plumber.startJob();
// Set up metrics emission
toolbox.addMonitor(metricsMonitor);
// Delay firehose connection to avoid claiming input resources while the plumber is starting up.
final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
// Skip connecting firehose if we've been stopped before we got started.
synchronized (this) {
if (!gracefullyStopped) {
firehose = firehoseFactory.connect(Preconditions.checkNotNull(spec.getDataSchema().getParser(), "inputRowParser"), toolbox.getIndexingTmpDir());
}
}
// Time to read data!
while (firehose != null && (!gracefullyStopped || firehoseDrainableByClosing) && firehose.hasMore()) {
Plumbers.addNextRow(committerSupplier, firehose, plumber, tuningConfig.isReportParseExceptions(), metrics);
}
} catch (Throwable e) {
normalExit = false;
log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
throw e;
} finally {
if (normalExit) {
try {
// Persist if we had actually started.
if (firehose != null) {
log.info("Persisting remaining data.");
final Committer committer = committerSupplier.get();
final CountDownLatch persistLatch = new CountDownLatch(1);
plumber.persist(new Committer() {
@Override
public Object getMetadata() {
return committer.getMetadata();
}
@Override
public void run() {
try {
committer.run();
} finally {
persistLatch.countDown();
}
}
});
persistLatch.await();
}
if (gracefullyStopped) {
log.info("Gracefully stopping.");
} else {
log.info("Finishing the job.");
synchronized (this) {
if (gracefullyStopped) {
// Someone called stopGracefully after we checked the flag. That's okay, just stop now.
log.info("Gracefully stopping.");
} else {
finishingJob = true;
}
}
if (finishingJob) {
plumber.finishJob();
}
}
} catch (InterruptedException e) {
log.debug(e, "Interrupted while finishing the job");
} catch (Exception e) {
log.makeAlert(e, "Failed to finish realtime task").emit();
throw e;
} finally {
if (firehose != null) {
CloseableUtils.closeAndSuppressExceptions(firehose, e -> log.warn("Failed to close Firehose"));
}
toolbox.removeMonitor(metricsMonitor);
}
}
toolbox.getDataSegmentServerAnnouncer().unannounce();
toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
}
log.info("Job done!");
return TaskStatus.success(getId());
}
use of org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction in project druid by druid-io.
the class HadoopIndexTask method runInternal.
@SuppressWarnings("unchecked")
private TaskStatus runInternal(TaskToolbox toolbox) throws Exception {
boolean indexGeneratorJobAttempted = false;
boolean indexGeneratorJobSuccess = false;
HadoopIngestionSpec indexerSchema = null;
try {
registerResourceCloserOnAbnormalExit(config -> killHadoopJob());
String hadoopJobIdFile = getHadoopJobIdFileName();
final ClassLoader loader = buildClassLoader(toolbox);
boolean determineIntervals = spec.getDataSchema().getGranularitySpec().inputIntervals().isEmpty();
HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed(spec, jsonMapper, new OverlordActionBasedUsedSegmentsRetriever(toolbox));
Object determinePartitionsInnerProcessingRunner = getForeignClassloaderObject("org.apache.druid.indexing.common.task.HadoopIndexTask$HadoopDetermineConfigInnerProcessingRunner", loader);
determinePartitionsStatsGetter = new InnerProcessingStatsGetter(determinePartitionsInnerProcessingRunner);
String[] determinePartitionsInput = new String[] { toolbox.getJsonMapper().writeValueAsString(spec), toolbox.getConfig().getHadoopWorkingPath(), toolbox.getSegmentPusher().getPathForHadoop(), hadoopJobIdFile };
final ClassLoader oldLoader = Thread.currentThread().getContextClassLoader();
Class<?> determinePartitionsRunnerClass = determinePartitionsInnerProcessingRunner.getClass();
Method determinePartitionsInnerProcessingRunTask = determinePartitionsRunnerClass.getMethod("runTask", determinePartitionsInput.getClass());
try {
Thread.currentThread().setContextClassLoader(loader);
ingestionState = IngestionState.DETERMINE_PARTITIONS;
final String determineConfigStatusString = (String) determinePartitionsInnerProcessingRunTask.invoke(determinePartitionsInnerProcessingRunner, new Object[] { determinePartitionsInput });
determineConfigStatus = toolbox.getJsonMapper().readValue(determineConfigStatusString, HadoopDetermineConfigInnerProcessingStatus.class);
indexerSchema = determineConfigStatus.getSchema();
if (indexerSchema == null) {
errorMsg = determineConfigStatus.getErrorMsg();
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.failure(getId(), errorMsg);
}
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
Thread.currentThread().setContextClassLoader(oldLoader);
}
// We should have a lock from before we started running only if interval was specified
String version;
if (determineIntervals) {
Interval interval = JodaUtils.umbrellaInterval(JodaUtils.condenseIntervals(indexerSchema.getDataSchema().getGranularitySpec().sortedBucketIntervals()));
final long lockTimeoutMs = getContextValue(Tasks.LOCK_TIMEOUT_KEY, Tasks.DEFAULT_LOCK_TIMEOUT_MILLIS);
// Note: if lockTimeoutMs is larger than ServerConfig.maxIdleTime, the below line can incur http timeout error.
final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, interval, lockTimeoutMs)), "Cannot acquire a lock for interval[%s]", interval);
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", interval));
}
version = lock.getVersion();
} else {
Iterable<TaskLock> locks = getTaskLocks(toolbox.getTaskActionClient());
final TaskLock myLock = Iterables.getOnlyElement(locks);
version = myLock.getVersion();
}
final String specVersion = indexerSchema.getTuningConfig().getVersion();
if (indexerSchema.getTuningConfig().isUseExplicitVersion()) {
if (specVersion.compareTo(version) < 0) {
version = specVersion;
} else {
String errMsg = StringUtils.format("Spec version can not be greater than or equal to the lock version, Spec version: [%s] Lock version: [%s].", specVersion, version);
log.error(errMsg);
toolbox.getTaskReportFileWriter().write(getId(), null);
return TaskStatus.failure(getId(), errMsg);
}
}
log.info("Setting version to: %s", version);
Object innerProcessingRunner = getForeignClassloaderObject("org.apache.druid.indexing.common.task.HadoopIndexTask$HadoopIndexGeneratorInnerProcessingRunner", loader);
buildSegmentsStatsGetter = new InnerProcessingStatsGetter(innerProcessingRunner);
String[] buildSegmentsInput = new String[] { toolbox.getJsonMapper().writeValueAsString(indexerSchema), version, hadoopJobIdFile };
Class<?> buildSegmentsRunnerClass = innerProcessingRunner.getClass();
Method innerProcessingRunTask = buildSegmentsRunnerClass.getMethod("runTask", buildSegmentsInput.getClass());
try {
Thread.currentThread().setContextClassLoader(loader);
ingestionState = IngestionState.BUILD_SEGMENTS;
indexGeneratorJobAttempted = true;
final String jobStatusString = (String) innerProcessingRunTask.invoke(innerProcessingRunner, new Object[] { buildSegmentsInput });
buildSegmentsStatus = toolbox.getJsonMapper().readValue(jobStatusString, HadoopIndexGeneratorInnerProcessingStatus.class);
List<DataSegmentAndIndexZipFilePath> dataSegmentAndIndexZipFilePaths = buildSegmentsStatus.getDataSegmentAndIndexZipFilePaths();
if (dataSegmentAndIndexZipFilePaths != null) {
indexGeneratorJobSuccess = true;
renameSegmentIndexFilesJob(toolbox.getJsonMapper().writeValueAsString(indexerSchema), toolbox.getJsonMapper().writeValueAsString(dataSegmentAndIndexZipFilePaths));
ArrayList<DataSegment> segments = new ArrayList<>(dataSegmentAndIndexZipFilePaths.stream().map(DataSegmentAndIndexZipFilePath::getSegment).collect(Collectors.toList()));
toolbox.publishSegments(segments);
// for awaitSegmentAvailabilityTimeoutMillis
if (spec.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis() > 0) {
ingestionState = IngestionState.SEGMENT_AVAILABILITY_WAIT;
waitForSegmentAvailability(toolbox, segments, spec.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis());
}
ingestionState = IngestionState.COMPLETED;
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.success(getId());
} else {
errorMsg = buildSegmentsStatus.getErrorMsg();
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.failure(getId(), errorMsg);
}
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
Thread.currentThread().setContextClassLoader(oldLoader);
}
} finally {
indexerGeneratorCleanupJob(indexGeneratorJobAttempted, indexGeneratorJobSuccess, indexerSchema == null ? null : toolbox.getJsonMapper().writeValueAsString(indexerSchema));
}
}
use of org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction in project druid by druid-io.
the class AppenderatorDriverRealtimeIndexTask method run.
@Override
public TaskStatus run(final TaskToolbox toolbox) {
runThread = Thread.currentThread();
authorizerMapper = toolbox.getAuthorizerMapper();
rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, spec.getTuningConfig().isLogParseExceptions(), spec.getTuningConfig().getMaxParseExceptions(), spec.getTuningConfig().getMaxSavedParseExceptions());
setupTimeoutAlert();
DataSchema dataSchema = spec.getDataSchema();
RealtimeAppenderatorTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir());
final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null);
final TaskRealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics, rowIngestionMeters);
this.metrics = fireDepartmentForMetrics.getMetrics();
final Supplier<Committer> committerSupplier = Committers.nilSupplier();
DiscoveryDruidNode discoveryDruidNode = createDiscoveryDruidNode(toolbox);
appenderator = newAppenderator(dataSchema, tuningConfig, metrics, toolbox);
TaskLockType lockType = getContextValue(Tasks.USE_SHARED_LOCK, false) ? TaskLockType.SHARED : TaskLockType.EXCLUSIVE;
StreamAppenderatorDriver driver = newDriver(dataSchema, appenderator, toolbox, metrics, lockType);
try {
log.debug("Found chat handler of class[%s]", toolbox.getChatHandlerProvider().getClass().getName());
toolbox.getChatHandlerProvider().register(getId(), this, false);
if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
toolbox.getDataSegmentServerAnnouncer().announce();
toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
}
driver.startJob(segmentId -> {
try {
if (lockGranularity == LockGranularity.SEGMENT) {
return toolbox.getTaskActionClient().submit(new SegmentLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), segmentId.getVersion(), segmentId.getShardSpec().getPartitionNum(), 1000L)).isOk();
} else {
final TaskLock lock = toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), 1000L));
if (lock == null) {
return false;
}
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segmentId.getInterval()));
}
return true;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
});
// Set up metrics emission
toolbox.addMonitor(metricsMonitor);
// Delay firehose connection to avoid claiming input resources while the plumber is starting up.
final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
int sequenceNumber = 0;
String sequenceName = makeSequenceName(getId(), sequenceNumber);
final TransactionalSegmentPublisher publisher = (mustBeNullOrEmptyOverwriteSegments, mustBeNullOrEmptyDropSegments, segments, commitMetadata) -> {
if (mustBeNullOrEmptyOverwriteSegments != null && !mustBeNullOrEmptyOverwriteSegments.isEmpty()) {
throw new ISE("Stream ingestion task unexpectedly attempted to overwrite segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyOverwriteSegments));
}
if (mustBeNullOrEmptyDropSegments != null && !mustBeNullOrEmptyDropSegments.isEmpty()) {
throw new ISE("Stream ingestion task unexpectedly attempted to drop segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyDropSegments));
}
final SegmentTransactionalInsertAction action = SegmentTransactionalInsertAction.appendAction(segments, null, null);
return toolbox.getTaskActionClient().submit(action);
};
// Skip connecting firehose if we've been stopped before we got started.
synchronized (this) {
if (!gracefullyStopped) {
firehose = firehoseFactory.connect(Preconditions.checkNotNull(spec.getDataSchema().getParser(), "inputRowParser"), toolbox.getIndexingTmpDir());
}
}
ingestionState = IngestionState.BUILD_SEGMENTS;
// Time to read data!
while (!gracefullyStopped && firehoseDrainableByClosing && firehose.hasMore()) {
try {
InputRow inputRow = firehose.nextRow();
if (inputRow == null) {
log.debug("Discarded null row, considering thrownAway.");
rowIngestionMeters.incrementThrownAway();
} else {
AppenderatorDriverAddResult addResult = driver.add(inputRow, sequenceName, committerSupplier);
if (addResult.isOk()) {
final boolean isPushRequired = addResult.isPushRequired(tuningConfig.getPartitionsSpec().getMaxRowsPerSegment(), tuningConfig.getPartitionsSpec().getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_MAX_TOTAL_ROWS));
if (isPushRequired) {
publishSegments(driver, publisher, committerSupplier, sequenceName);
sequenceNumber++;
sequenceName = makeSequenceName(getId(), sequenceNumber);
}
} else {
// If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp());
}
}
} catch (ParseException e) {
handleParseException(e);
}
}
ingestionState = IngestionState.COMPLETED;
if (!gracefullyStopped) {
synchronized (this) {
if (gracefullyStopped) {
// Someone called stopGracefully after we checked the flag. That's okay, just stop now.
log.info("Gracefully stopping.");
} else {
finishingJob = true;
}
}
if (finishingJob) {
log.info("Finishing job...");
// Publish any remaining segments
publishSegments(driver, publisher, committerSupplier, sequenceName);
waitForSegmentPublishAndHandoff(tuningConfig.getPublishAndHandoffTimeout());
}
} else if (firehose != null) {
log.info("Task was gracefully stopped, will persist data before exiting");
persistAndWait(driver, committerSupplier.get());
}
} catch (Throwable e) {
log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
errorMsg = Throwables.getStackTraceAsString(e);
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.failure(getId(), errorMsg);
} finally {
toolbox.getChatHandlerProvider().unregister(getId());
CloseableUtils.closeAndSuppressExceptions(firehose, e -> log.warn("Failed to close Firehose"));
appenderator.close();
CloseableUtils.closeAndSuppressExceptions(driver, e -> log.warn("Failed to close AppenderatorDriver"));
toolbox.removeMonitor(metricsMonitor);
if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
toolbox.getDataSegmentServerAnnouncer().unannounce();
toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
}
}
log.info("Job done!");
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.success(getId());
}
use of org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction in project druid by druid-io.
the class RealtimeishTask method run.
@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
final Interval interval1 = Intervals.of("2010-01-01T00/PT1H");
final Interval interval2 = Intervals.of("2010-01-01T01/PT1H");
// Sort of similar to what realtime tasks do:
// Acquire lock for first interval
final TaskLock lock1 = toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, interval1, 5000));
Assert.assertNotNull(lock1);
final List<TaskLock> locks1 = toolbox.getTaskActionClient().submit(new LockListAction());
// (Confirm lock sanity)
Assert.assertEquals("lock1 interval", interval1, lock1.getInterval());
Assert.assertEquals("locks1", ImmutableList.of(lock1), locks1);
// Acquire lock for second interval
final TaskLock lock2 = toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, interval2, 5000));
Assert.assertNotNull(lock2);
final List<TaskLock> locks2 = toolbox.getTaskActionClient().submit(new LockListAction());
// (Confirm lock sanity)
Assert.assertEquals("lock2 interval", interval2, lock2.getInterval());
Assert.assertEquals("locks2", ImmutableList.of(lock1, lock2), locks2);
// Push first segment
SegmentInsertAction firstSegmentInsertAction = new SegmentInsertAction(ImmutableSet.of(DataSegment.builder().dataSource("foo").interval(interval1).version(lock1.getVersion()).size(0).build()));
toolbox.getTaskActionClient().submit(firstSegmentInsertAction);
// Release first lock
toolbox.getTaskActionClient().submit(new LockReleaseAction(interval1));
final List<TaskLock> locks3 = toolbox.getTaskActionClient().submit(new LockListAction());
// (Confirm lock sanity)
Assert.assertEquals("locks3", ImmutableList.of(lock2), locks3);
// Push second segment
SegmentInsertAction secondSegmentInsertAction = new SegmentInsertAction(ImmutableSet.of(DataSegment.builder().dataSource("foo").interval(interval2).version(lock2.getVersion()).size(0).build()));
toolbox.getTaskActionClient().submit(secondSegmentInsertAction);
// Release second lock
toolbox.getTaskActionClient().submit(new LockReleaseAction(interval2));
final List<TaskLock> locks4 = toolbox.getTaskActionClient().submit(new LockListAction());
// (Confirm lock sanity)
Assert.assertEquals("locks4", ImmutableList.<TaskLock>of(), locks4);
// Exit
return TaskStatus.success(getId());
}
use of org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction in project druid by druid-io.
the class SeekableStreamIndexTaskRunner method runInternal.
private TaskStatus runInternal(TaskToolbox toolbox) throws Exception {
startTime = DateTimes.nowUtc();
status = Status.STARTING;
setToolbox(toolbox);
authorizerMapper = toolbox.getAuthorizerMapper();
rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, tuningConfig.isLogParseExceptions(), tuningConfig.getMaxParseExceptions(), tuningConfig.getMaxSavedParseExceptions());
// Now we can initialize StreamChunkReader with the given toolbox.
final StreamChunkParser parser = new StreamChunkParser<RecordType>(this.parser, inputFormat, inputRowSchema, task.getDataSchema().getTransformSpec(), toolbox.getIndexingTmpDir(), row -> row != null && task.withinMinMaxRecordTime(row), rowIngestionMeters, parseExceptionHandler);
initializeSequences();
log.debug("Found chat handler of class[%s]", toolbox.getChatHandlerProvider().getClass().getName());
toolbox.getChatHandlerProvider().register(task.getId(), this, false);
runThread = Thread.currentThread();
// Set up FireDepartmentMetrics
final FireDepartment fireDepartmentForMetrics = new FireDepartment(task.getDataSchema(), new RealtimeIOConfig(null, null), null);
this.fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
toolbox.addMonitor(TaskRealtimeMetricsMonitorBuilder.build(task, fireDepartmentForMetrics, rowIngestionMeters));
final String lookupTier = task.getContextValue(RealtimeIndexTask.CTX_KEY_LOOKUP_TIER);
final LookupNodeService lookupNodeService = lookupTier == null ? toolbox.getLookupNodeService() : new LookupNodeService(lookupTier);
final DiscoveryDruidNode discoveryDruidNode = new DiscoveryDruidNode(toolbox.getDruidNode(), NodeRole.PEON, ImmutableMap.of(toolbox.getDataNodeService().getName(), toolbox.getDataNodeService(), lookupNodeService.getName(), lookupNodeService));
Throwable caughtExceptionOuter = null;
// milliseconds waited for created segments to be handed off
long handoffWaitMs = 0L;
try (final RecordSupplier<PartitionIdType, SequenceOffsetType, RecordType> recordSupplier = task.newTaskRecordSupplier()) {
if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
toolbox.getDataSegmentServerAnnouncer().announce();
toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
}
appenderator = task.newAppenderator(toolbox, fireDepartmentMetrics, rowIngestionMeters, parseExceptionHandler);
driver = task.newDriver(appenderator, toolbox, fireDepartmentMetrics);
// Start up, set up initial sequences.
final Object restoredMetadata = driver.startJob(segmentId -> {
try {
if (lockGranularityToUse == LockGranularity.SEGMENT) {
return toolbox.getTaskActionClient().submit(new SegmentLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), segmentId.getVersion(), segmentId.getShardSpec().getPartitionNum(), 1000L)).isOk();
} else {
final TaskLock lock = toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), 1000L));
if (lock == null) {
return false;
}
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segmentId.getInterval()));
}
return true;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
});
if (restoredMetadata == null) {
// no persist has happened so far
// so either this is a brand new task or replacement of a failed task
Preconditions.checkState(sequences.get(0).startOffsets.entrySet().stream().allMatch(partitionOffsetEntry -> createSequenceNumber(partitionOffsetEntry.getValue()).compareTo(createSequenceNumber(ioConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(partitionOffsetEntry.getKey()))) >= 0), "Sequence sequences are not compatible with start sequences of task");
currOffsets.putAll(sequences.get(0).startOffsets);
} else {
@SuppressWarnings("unchecked") final Map<String, Object> restoredMetadataMap = (Map) restoredMetadata;
final SeekableStreamEndSequenceNumbers<PartitionIdType, SequenceOffsetType> restoredNextPartitions = deserializePartitionsFromMetadata(toolbox.getJsonMapper(), restoredMetadataMap.get(METADATA_NEXT_PARTITIONS));
currOffsets.putAll(restoredNextPartitions.getPartitionSequenceNumberMap());
// Sanity checks.
if (!restoredNextPartitions.getStream().equals(ioConfig.getStartSequenceNumbers().getStream())) {
throw new ISE("Restored stream[%s] but expected stream[%s]", restoredNextPartitions.getStream(), ioConfig.getStartSequenceNumbers().getStream());
}
if (!currOffsets.keySet().equals(ioConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().keySet())) {
throw new ISE("Restored partitions[%s] but expected partitions[%s]", currOffsets.keySet(), ioConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().keySet());
}
// which is super rare
if (sequences.size() == 0 || getLastSequenceMetadata().isCheckpointed()) {
this.endOffsets.putAll(sequences.size() == 0 ? currOffsets : getLastSequenceMetadata().getEndOffsets());
}
}
log.info("Initialized sequences: %s", sequences.stream().map(SequenceMetadata::toString).collect(Collectors.joining(", ")));
// Filter out partitions with END_OF_SHARD markers since these partitions have already been fully read. This
// should have been done by the supervisor already so this is defensive.
int numPreFilterPartitions = currOffsets.size();
if (currOffsets.entrySet().removeIf(x -> isEndOfShard(x.getValue()))) {
log.info("Removed [%d] partitions from assignment which have already been closed.", numPreFilterPartitions - currOffsets.size());
}
// When end offsets are exclusive, we never skip the start record.
if (!isEndOffsetExclusive()) {
for (Map.Entry<PartitionIdType, SequenceOffsetType> entry : currOffsets.entrySet()) {
final boolean isAtStart = entry.getValue().equals(ioConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(entry.getKey()));
if (!isAtStart || ioConfig.getStartSequenceNumbers().getExclusivePartitions().contains(entry.getKey())) {
lastReadOffsets.put(entry.getKey(), entry.getValue());
}
}
}
// Set up committer.
final Supplier<Committer> committerSupplier = () -> {
final Map<PartitionIdType, SequenceOffsetType> snapshot = ImmutableMap.copyOf(currOffsets);
lastPersistedOffsets.clear();
lastPersistedOffsets.putAll(snapshot);
return new Committer() {
@Override
public Object getMetadata() {
return ImmutableMap.of(METADATA_NEXT_PARTITIONS, new SeekableStreamEndSequenceNumbers<>(stream, snapshot));
}
@Override
public void run() {
// Do nothing.
}
};
};
// restart publishing of sequences (if any)
maybePersistAndPublishSequences(committerSupplier);
Set<StreamPartition<PartitionIdType>> assignment = assignPartitions(recordSupplier);
possiblyResetDataSourceMetadata(toolbox, recordSupplier, assignment);
seekToStartingSequence(recordSupplier, assignment);
ingestionState = IngestionState.BUILD_SEGMENTS;
// Main loop.
// Could eventually support leader/follower mode (for keeping replicas more in sync)
boolean stillReading = !assignment.isEmpty();
status = Status.READING;
Throwable caughtExceptionInner = null;
try {
while (stillReading) {
if (possiblyPause()) {
// The partition assignments may have changed while paused by a call to setEndOffsets() so reassign
// partitions upon resuming. Don't call "seekToStartingSequence" after "assignPartitions", because there's
// no need to re-seek here. All we're going to be doing is dropping partitions.
assignment = assignPartitions(recordSupplier);
possiblyResetDataSourceMetadata(toolbox, recordSupplier, assignment);
if (assignment.isEmpty()) {
log.debug("All partitions have been fully read.");
publishOnStop.set(true);
stopRequested.set(true);
}
}
// if stop is requested or task's end sequence is set by call to setEndOffsets method with finish set to true
if (stopRequested.get() || sequences.size() == 0 || getLastSequenceMetadata().isCheckpointed()) {
status = Status.PUBLISHING;
}
if (stopRequested.get()) {
break;
}
if (backgroundThreadException != null) {
throw new RuntimeException(backgroundThreadException);
}
checkPublishAndHandoffFailure();
maybePersistAndPublishSequences(committerSupplier);
// calling getRecord() ensures that exceptions specific to kafka/kinesis like OffsetOutOfRangeException
// are handled in the subclasses.
List<OrderedPartitionableRecord<PartitionIdType, SequenceOffsetType, RecordType>> records = getRecords(recordSupplier, toolbox);
// note: getRecords() also updates assignment
stillReading = !assignment.isEmpty();
SequenceMetadata<PartitionIdType, SequenceOffsetType> sequenceToCheckpoint = null;
for (OrderedPartitionableRecord<PartitionIdType, SequenceOffsetType, RecordType> record : records) {
final boolean shouldProcess = verifyRecordInRange(record.getPartitionId(), record.getSequenceNumber());
log.trace("Got stream[%s] partition[%s] sequenceNumber[%s], shouldProcess[%s].", record.getStream(), record.getPartitionId(), record.getSequenceNumber(), shouldProcess);
if (shouldProcess) {
final List<InputRow> rows = parser.parse(record.getData(), isEndOfShard(record.getSequenceNumber()));
boolean isPersistRequired = false;
final SequenceMetadata<PartitionIdType, SequenceOffsetType> sequenceToUse = sequences.stream().filter(sequenceMetadata -> sequenceMetadata.canHandle(this, record)).findFirst().orElse(null);
if (sequenceToUse == null) {
throw new ISE("Cannot find any valid sequence for record with partition [%s] and sequenceNumber [%s]. Current sequences: %s", record.getPartitionId(), record.getSequenceNumber(), sequences);
}
for (InputRow row : rows) {
final AppenderatorDriverAddResult addResult = driver.add(row, sequenceToUse.getSequenceName(), committerSupplier, true, // of rows are indexed
false);
if (addResult.isOk()) {
// If the number of rows in the segment exceeds the threshold after adding a row,
// move the segment out from the active segments of BaseAppenderatorDriver to make a new segment.
final boolean isPushRequired = addResult.isPushRequired(tuningConfig.getPartitionsSpec().getMaxRowsPerSegment(), tuningConfig.getPartitionsSpec().getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_MAX_TOTAL_ROWS));
if (isPushRequired && !sequenceToUse.isCheckpointed()) {
sequenceToCheckpoint = sequenceToUse;
}
isPersistRequired |= addResult.isPersistRequired();
} else {
// If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
throw new ISE("Could not allocate segment for row with timestamp[%s]", row.getTimestamp());
}
}
if (isPersistRequired) {
Futures.addCallback(driver.persistAsync(committerSupplier.get()), new FutureCallback<Object>() {
@Override
public void onSuccess(@Nullable Object result) {
log.debug("Persist completed with metadata: %s", result);
}
@Override
public void onFailure(Throwable t) {
log.error("Persist failed, dying");
backgroundThreadException = t;
}
});
}
// in kafka, we can easily get the next offset by adding 1, but for kinesis, there's no way
// to get the next sequence number without having to make an expensive api call. So the behavior
// here for kafka is to +1 while for kinesis we simply save the current sequence number
lastReadOffsets.put(record.getPartitionId(), record.getSequenceNumber());
currOffsets.put(record.getPartitionId(), getNextStartOffset(record.getSequenceNumber()));
}
// Use record.getSequenceNumber() in the moreToRead check, since currOffsets might not have been
// updated if we were skipping records for being beyond the end.
final boolean moreToReadAfterThisRecord = isMoreToReadAfterReadingRecord(record.getSequenceNumber(), endOffsets.get(record.getPartitionId()));
if (!moreToReadAfterThisRecord && assignment.remove(record.getStreamPartition())) {
log.info("Finished reading stream[%s], partition[%s].", record.getStream(), record.getPartitionId());
recordSupplier.assign(assignment);
stillReading = !assignment.isEmpty();
}
}
if (!stillReading) {
// We let the fireDepartmentMetrics know that all messages have been read. This way, some metrics such as
// high message gap need not be reported
fireDepartmentMetrics.markProcessingDone();
}
if (System.currentTimeMillis() > nextCheckpointTime) {
sequenceToCheckpoint = getLastSequenceMetadata();
}
if (sequenceToCheckpoint != null && stillReading) {
Preconditions.checkArgument(getLastSequenceMetadata().getSequenceName().equals(sequenceToCheckpoint.getSequenceName()), "Cannot checkpoint a sequence [%s] which is not the latest one, sequences %s", sequenceToCheckpoint, sequences);
requestPause();
final CheckPointDataSourceMetadataAction checkpointAction = new CheckPointDataSourceMetadataAction(task.getDataSource(), ioConfig.getTaskGroupId(), null, createDataSourceMetadata(new SeekableStreamStartSequenceNumbers<>(stream, sequenceToCheckpoint.getStartOffsets(), sequenceToCheckpoint.getExclusiveStartPartitions())));
if (!toolbox.getTaskActionClient().submit(checkpointAction)) {
throw new ISE("Checkpoint request with sequences [%s] failed, dying", currOffsets);
}
}
}
ingestionState = IngestionState.COMPLETED;
} catch (Exception e) {
// (1) catch all exceptions while reading from kafka
caughtExceptionInner = e;
log.error(e, "Encountered exception in run() before persisting.");
throw e;
} finally {
try {
// persist pending data
driver.persist(committerSupplier.get());
} catch (Exception e) {
if (caughtExceptionInner != null) {
caughtExceptionInner.addSuppressed(e);
} else {
throw e;
}
}
}
synchronized (statusLock) {
if (stopRequested.get() && !publishOnStop.get()) {
throw new InterruptedException("Stopping without publishing");
}
status = Status.PUBLISHING;
}
// We need to copy sequences here, because the success callback in publishAndRegisterHandoff removes items from
// the sequence list. If a publish finishes before we finish iterating through the sequence list, we can
// end up skipping some sequences.
List<SequenceMetadata<PartitionIdType, SequenceOffsetType>> sequencesSnapshot = new ArrayList<>(sequences);
for (int i = 0; i < sequencesSnapshot.size(); i++) {
final SequenceMetadata<PartitionIdType, SequenceOffsetType> sequenceMetadata = sequencesSnapshot.get(i);
if (!publishingSequences.contains(sequenceMetadata.getSequenceName())) {
final boolean isLast = i == (sequencesSnapshot.size() - 1);
if (isLast) {
// Shorten endOffsets of the last sequence to match currOffsets.
sequenceMetadata.setEndOffsets(currOffsets);
}
// Update assignments of the sequence, which should clear them. (This will be checked later, when the
// Committer is built.)
sequenceMetadata.updateAssignments(currOffsets, this::isMoreToReadAfterReadingRecord);
publishingSequences.add(sequenceMetadata.getSequenceName());
// persist already done in finally, so directly add to publishQueue
publishAndRegisterHandoff(sequenceMetadata);
}
}
if (backgroundThreadException != null) {
throw new RuntimeException(backgroundThreadException);
}
// Wait for publish futures to complete.
Futures.allAsList(publishWaitList).get();
// Wait for handoff futures to complete.
// Note that every publishing task (created by calling AppenderatorDriver.publish()) has a corresponding
// handoffFuture. handoffFuture can throw an exception if 1) the corresponding publishFuture failed or 2) it
// failed to persist sequences. It might also return null if handoff failed, but was recoverable.
// See publishAndRegisterHandoff() for details.
List<SegmentsAndCommitMetadata> handedOffList = Collections.emptyList();
if (tuningConfig.getHandoffConditionTimeout() == 0) {
handedOffList = Futures.allAsList(handOffWaitList).get();
} else {
final long start = System.nanoTime();
try {
handedOffList = Futures.allAsList(handOffWaitList).get(tuningConfig.getHandoffConditionTimeout(), TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
// Handoff timeout is not an indexing failure, but coordination failure. We simply ignore timeout exception
// here.
log.makeAlert("Timeout waiting for handoff").addData("taskId", task.getId()).addData("handoffConditionTimeout", tuningConfig.getHandoffConditionTimeout()).emit();
} finally {
handoffWaitMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
}
}
for (SegmentsAndCommitMetadata handedOff : handedOffList) {
log.info("Handoff complete for segments: %s", String.join(", ", Lists.transform(handedOff.getSegments(), DataSegment::toString)));
}
appenderator.close();
} catch (InterruptedException | RejectedExecutionException e) {
// (2) catch InterruptedException and RejectedExecutionException thrown for the whole ingestion steps including
// the final publishing.
caughtExceptionOuter = e;
try {
Futures.allAsList(publishWaitList).cancel(true);
Futures.allAsList(handOffWaitList).cancel(true);
if (appenderator != null) {
appenderator.closeNow();
}
} catch (Exception e2) {
e.addSuppressed(e2);
}
// handle the InterruptedException that gets wrapped in a RejectedExecutionException
if (e instanceof RejectedExecutionException && (e.getCause() == null || !(e.getCause() instanceof InterruptedException))) {
throw e;
}
// if we were interrupted because we were asked to stop, handle the exception and return success, else rethrow
if (!stopRequested.get()) {
Thread.currentThread().interrupt();
throw e;
}
} catch (Exception e) {
// (3) catch all other exceptions thrown for the whole ingestion steps including the final publishing.
caughtExceptionOuter = e;
try {
Futures.allAsList(publishWaitList).cancel(true);
Futures.allAsList(handOffWaitList).cancel(true);
if (appenderator != null) {
appenderator.closeNow();
}
} catch (Exception e2) {
e.addSuppressed(e2);
}
throw e;
} finally {
try {
if (driver != null) {
driver.close();
}
toolbox.getChatHandlerProvider().unregister(task.getId());
if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
toolbox.getDataSegmentServerAnnouncer().unannounce();
}
} catch (Throwable e) {
if (caughtExceptionOuter != null) {
caughtExceptionOuter.addSuppressed(e);
} else {
throw e;
}
}
}
toolbox.getTaskReportFileWriter().write(task.getId(), getTaskCompletionReports(null, handoffWaitMs));
return TaskStatus.success(task.getId());
}
Aggregations