use of io.druid.indexing.common.actions.LockAcquireAction in project druid by druid-io.
the class IndexTaskTest method runTask.
private final List<DataSegment> runTask(final IndexTask indexTask) throws Exception {
final List<DataSegment> segments = Lists.newArrayList();
indexTask.run(new TaskToolbox(null, null, new TaskActionClient() {
@Override
public <RetType> RetType submit(TaskAction<RetType> taskAction) throws IOException {
if (taskAction instanceof LockListAction) {
return (RetType) Arrays.asList(new TaskLock("", "", null, new DateTime().toString()));
}
if (taskAction instanceof LockAcquireAction) {
return (RetType) new TaskLock("groupId", "test", ((LockAcquireAction) taskAction).getInterval(), new DateTime().toString());
}
if (taskAction instanceof SegmentTransactionalInsertAction) {
return (RetType) new SegmentPublishResult(((SegmentTransactionalInsertAction) taskAction).getSegments(), true);
}
if (taskAction instanceof SegmentAllocateAction) {
SegmentAllocateAction action = (SegmentAllocateAction) taskAction;
Interval interval = action.getPreferredSegmentGranularity().bucket(action.getTimestamp());
ShardSpec shardSpec = new NumberedShardSpec(segmentAllocatePartitionCounter++, 0);
return (RetType) new SegmentIdentifier(action.getDataSource(), interval, "latestVersion", shardSpec);
}
return null;
}
}, null, new DataSegmentPusher() {
@Deprecated
@Override
public String getPathForHadoop(String dataSource) {
return getPathForHadoop();
}
@Override
public String getPathForHadoop() {
return null;
}
@Override
public DataSegment push(File file, DataSegment segment) throws IOException {
segments.add(segment);
return segment;
}
}, null, null, null, null, null, null, null, null, null, jsonMapper, temporaryFolder.newFolder(), indexMerger, indexIO, null, null, indexMergerV9));
Collections.sort(segments);
return segments;
}
use of io.druid.indexing.common.actions.LockAcquireAction in project druid by druid-io.
the class HadoopIndexTask method run.
@SuppressWarnings("unchecked")
@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
final ClassLoader loader = buildClassLoader(toolbox);
boolean determineIntervals = !spec.getDataSchema().getGranularitySpec().bucketIntervals().isPresent();
spec = HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed(spec, jsonMapper, new OverlordActionBasedUsedSegmentLister(toolbox));
final String config = invokeForeignLoader("io.druid.indexing.common.task.HadoopIndexTask$HadoopDetermineConfigInnerProcessing", new String[] { toolbox.getObjectMapper().writeValueAsString(spec), toolbox.getConfig().getHadoopWorkingPath(), toolbox.getSegmentPusher().getPathForHadoop() }, loader);
final HadoopIngestionSpec indexerSchema = toolbox.getObjectMapper().readValue(config, HadoopIngestionSpec.class);
// We should have a lock from before we started running only if interval was specified
String version;
if (determineIntervals) {
Interval interval = JodaUtils.umbrellaInterval(JodaUtils.condenseIntervals(indexerSchema.getDataSchema().getGranularitySpec().bucketIntervals().get()));
TaskLock lock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
version = lock.getVersion();
} else {
Iterable<TaskLock> locks = getTaskLocks(toolbox);
final TaskLock myLock = Iterables.getOnlyElement(locks);
version = myLock.getVersion();
}
final String specVersion = indexerSchema.getTuningConfig().getVersion();
if (indexerSchema.getTuningConfig().isUseExplicitVersion()) {
if (specVersion.compareTo(version) < 0) {
version = specVersion;
} else {
log.error("Spec version can not be greater than or equal to the lock version, Spec version: [%s] Lock version: [%s].", specVersion, version);
return TaskStatus.failure(getId());
}
}
log.info("Setting version to: %s", version);
final String segments = invokeForeignLoader("io.druid.indexing.common.task.HadoopIndexTask$HadoopIndexGeneratorInnerProcessing", new String[] { toolbox.getObjectMapper().writeValueAsString(indexerSchema), version }, loader);
if (segments != null) {
List<DataSegment> publishedSegments = toolbox.getObjectMapper().readValue(segments, new TypeReference<List<DataSegment>>() {
});
toolbox.publishSegments(publishedSegments);
return TaskStatus.success(getId());
} else {
return TaskStatus.failure(getId());
}
}
use of io.druid.indexing.common.actions.LockAcquireAction in project druid by druid-io.
the class IndexTask method run.
@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
final boolean determineIntervals = !ingestionSchema.getDataSchema().getGranularitySpec().bucketIntervals().isPresent();
final FirehoseFactory delegateFirehoseFactory = ingestionSchema.getIOConfig().getFirehoseFactory();
if (delegateFirehoseFactory instanceof IngestSegmentFirehoseFactory) {
// pass toolbox to Firehose
((IngestSegmentFirehoseFactory) delegateFirehoseFactory).setTaskToolbox(toolbox);
}
final FirehoseFactory firehoseFactory;
if (ingestionSchema.getIOConfig().isSkipFirehoseCaching() || delegateFirehoseFactory instanceof ReplayableFirehoseFactory) {
firehoseFactory = delegateFirehoseFactory;
} else {
firehoseFactory = new ReplayableFirehoseFactory(delegateFirehoseFactory, ingestionSchema.getTuningConfig().isReportParseExceptions(), null, null, smileMapper);
}
final Map<Interval, List<ShardSpec>> shardSpecs = determineShardSpecs(toolbox, firehoseFactory);
final String version;
final DataSchema dataSchema;
if (determineIntervals) {
Interval interval = JodaUtils.umbrellaInterval(shardSpecs.keySet());
TaskLock lock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
version = lock.getVersion();
dataSchema = ingestionSchema.getDataSchema().withGranularitySpec(ingestionSchema.getDataSchema().getGranularitySpec().withIntervals(JodaUtils.condenseIntervals(shardSpecs.keySet())));
} else {
version = Iterables.getOnlyElement(getTaskLocks(toolbox)).getVersion();
dataSchema = ingestionSchema.getDataSchema();
}
if (generateAndPublishSegments(toolbox, dataSchema, shardSpecs, version, firehoseFactory)) {
return TaskStatus.success(getId());
} else {
return TaskStatus.failure(getId());
}
}
use of io.druid.indexing.common.actions.LockAcquireAction in project druid by druid-io.
the class RealtimeIndexTask method run.
@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
runThread = Thread.currentThread();
if (this.plumber != null) {
throw new IllegalStateException("WTF?!? run with non-null plumber??!");
}
boolean normalExit = true;
// It would be nice to get the PlumberSchool in the constructor. Although that will need jackson injectables for
// stuff like the ServerView, which seems kind of odd? Perhaps revisit this when Guice has been introduced.
final SegmentPublisher segmentPublisher = new TaskActionSegmentPublisher(this, toolbox);
// NOTE: We talk to the coordinator in various places in the plumber and we could be more robust to issues
// with the coordinator. Right now, we'll block/throw in whatever thread triggered the coordinator behavior,
// which will typically be either the main data processing loop or the persist thread.
// Wrap default DataSegmentAnnouncer such that we unlock intervals as we unannounce segments
final DataSegmentAnnouncer lockingSegmentAnnouncer = new DataSegmentAnnouncer() {
@Override
public void announceSegment(final DataSegment segment) throws IOException {
// Side effect: Calling announceSegment causes a lock to be acquired
toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
toolbox.getSegmentAnnouncer().announceSegment(segment);
}
@Override
public void unannounceSegment(final DataSegment segment) throws IOException {
try {
toolbox.getSegmentAnnouncer().unannounceSegment(segment);
} finally {
toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
}
}
@Override
public void announceSegments(Iterable<DataSegment> segments) throws IOException {
// Side effect: Calling announceSegments causes locks to be acquired
for (DataSegment segment : segments) {
toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
}
toolbox.getSegmentAnnouncer().announceSegments(segments);
}
@Override
public void unannounceSegments(Iterable<DataSegment> segments) throws IOException {
try {
toolbox.getSegmentAnnouncer().unannounceSegments(segments);
} finally {
for (DataSegment segment : segments) {
toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
}
}
}
@Override
public boolean isAnnounced(DataSegment segment) {
return toolbox.getSegmentAnnouncer().isAnnounced(segment);
}
};
// NOTE: getVersion will block if there is lock contention, which will block plumber.getSink
// NOTE: (and thus the firehose)
// Shouldn't usually happen, since we don't expect people to submit tasks that intersect with the
// realtime window, but if they do it can be problematic. If we decide to care, we can use more threads in
// the plumber such that waiting for the coordinator doesn't block data processing.
final VersioningPolicy versioningPolicy = new VersioningPolicy() {
@Override
public String getVersion(final Interval interval) {
try {
// Side effect: Calling getVersion causes a lock to be acquired
final TaskLock myLock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
return myLock.getVersion();
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
};
DataSchema dataSchema = spec.getDataSchema();
RealtimeIOConfig realtimeIOConfig = spec.getIOConfig();
RealtimeTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(new File(toolbox.getTaskWorkDir(), "persist")).withVersioningPolicy(versioningPolicy);
final FireDepartment fireDepartment = new FireDepartment(dataSchema, realtimeIOConfig, tuningConfig);
this.metrics = fireDepartment.getMetrics();
final RealtimeMetricsMonitor metricsMonitor = new RealtimeMetricsMonitor(ImmutableList.of(fireDepartment), ImmutableMap.of(DruidMetrics.TASK_ID, new String[] { getId() }));
this.queryRunnerFactoryConglomerate = toolbox.getQueryRunnerFactoryConglomerate();
// NOTE: This pusher selects path based purely on global configuration and the DataSegment, which means
// NOTE: that redundant realtime tasks will upload to the same location. This can cause index.zip
// NOTE: (partitionNum_index.zip for HDFS data storage) and descriptor.json (partitionNum_descriptor.json for
// NOTE: HDFS data storage) to mismatch, or it can cause historical nodes to load different instances of
// NOTE: the "same" segment.
final PlumberSchool plumberSchool = new RealtimePlumberSchool(toolbox.getEmitter(), toolbox.getQueryRunnerFactoryConglomerate(), toolbox.getSegmentPusher(), lockingSegmentAnnouncer, segmentPublisher, toolbox.getSegmentHandoffNotifierFactory(), toolbox.getQueryExecutorService(), toolbox.getIndexMerger(), toolbox.getIndexMergerV9(), toolbox.getIndexIO(), toolbox.getCache(), toolbox.getCacheConfig(), toolbox.getObjectMapper());
this.plumber = plumberSchool.findPlumber(dataSchema, tuningConfig, metrics);
Supplier<Committer> committerSupplier = null;
try {
plumber.startJob();
// Set up metrics emission
toolbox.getMonitorScheduler().addMonitor(metricsMonitor);
// Delay firehose connection to avoid claiming input resources while the plumber is starting up.
final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
// Skip connecting firehose if we've been stopped before we got started.
synchronized (this) {
if (!gracefullyStopped) {
firehose = firehoseFactory.connect(spec.getDataSchema().getParser());
committerSupplier = Committers.supplierFromFirehose(firehose);
}
}
// Time to read data!
while (firehose != null && (!gracefullyStopped || firehoseDrainableByClosing) && firehose.hasMore()) {
Plumbers.addNextRow(committerSupplier, firehose, plumber, tuningConfig.isReportParseExceptions(), metrics);
}
} catch (Throwable e) {
normalExit = false;
log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
throw e;
} finally {
if (normalExit) {
try {
// Persist if we had actually started.
if (firehose != null) {
log.info("Persisting remaining data.");
final Committer committer = committerSupplier.get();
final CountDownLatch persistLatch = new CountDownLatch(1);
plumber.persist(new Committer() {
@Override
public Object getMetadata() {
return committer.getMetadata();
}
@Override
public void run() {
try {
committer.run();
} finally {
persistLatch.countDown();
}
}
});
persistLatch.await();
}
if (gracefullyStopped) {
log.info("Gracefully stopping.");
} else {
log.info("Finishing the job.");
synchronized (this) {
if (gracefullyStopped) {
// Someone called stopGracefully after we checked the flag. That's okay, just stop now.
log.info("Gracefully stopping.");
} else {
finishingJob = true;
}
}
if (finishingJob) {
plumber.finishJob();
}
}
} catch (InterruptedException e) {
log.debug(e, "Interrupted while finishing the job");
} catch (Exception e) {
log.makeAlert(e, "Failed to finish realtime task").emit();
throw e;
} finally {
if (firehose != null) {
CloseQuietly.close(firehose);
}
toolbox.getMonitorScheduler().removeMonitor(metricsMonitor);
}
}
}
log.info("Job done!");
return TaskStatus.success(getId());
}
use of io.druid.indexing.common.actions.LockAcquireAction in project druid by druid-io.
the class RealtimeishTask method run.
@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
final Interval interval1 = new Interval("2010-01-01T00/PT1H");
final Interval interval2 = new Interval("2010-01-01T01/PT1H");
// Sort of similar to what realtime tasks do:
// Acquire lock for first interval
final TaskLock lock1 = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval1));
final List<TaskLock> locks1 = toolbox.getTaskActionClient().submit(new LockListAction());
// (Confirm lock sanity)
Assert.assertEquals("lock1 interval", interval1, lock1.getInterval());
Assert.assertEquals("locks1", ImmutableList.of(lock1), locks1);
// Acquire lock for second interval
final TaskLock lock2 = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval2));
final List<TaskLock> locks2 = toolbox.getTaskActionClient().submit(new LockListAction());
// (Confirm lock sanity)
Assert.assertEquals("lock2 interval", interval2, lock2.getInterval());
Assert.assertEquals("locks2", ImmutableList.of(lock1, lock2), locks2);
// Push first segment
toolbox.getTaskActionClient().submit(new SegmentInsertAction(ImmutableSet.of(DataSegment.builder().dataSource("foo").interval(interval1).version(lock1.getVersion()).build())));
// Release first lock
toolbox.getTaskActionClient().submit(new LockReleaseAction(interval1));
final List<TaskLock> locks3 = toolbox.getTaskActionClient().submit(new LockListAction());
// (Confirm lock sanity)
Assert.assertEquals("locks3", ImmutableList.of(lock2), locks3);
// Push second segment
toolbox.getTaskActionClient().submit(new SegmentInsertAction(ImmutableSet.of(DataSegment.builder().dataSource("foo").interval(interval2).version(lock2.getVersion()).build())));
// Release second lock
toolbox.getTaskActionClient().submit(new LockReleaseAction(interval2));
final List<TaskLock> locks4 = toolbox.getTaskActionClient().submit(new LockListAction());
// (Confirm lock sanity)
Assert.assertEquals("locks4", ImmutableList.<TaskLock>of(), locks4);
// Exit
return TaskStatus.success(getId());
}
Aggregations