use of io.druid.indexing.common.TaskLock in project druid by druid-io.
the class HadoopIndexTask method run.
@SuppressWarnings("unchecked")
@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
final ClassLoader loader = buildClassLoader(toolbox);
boolean determineIntervals = !spec.getDataSchema().getGranularitySpec().bucketIntervals().isPresent();
spec = HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed(spec, jsonMapper, new OverlordActionBasedUsedSegmentLister(toolbox));
final String config = invokeForeignLoader("io.druid.indexing.common.task.HadoopIndexTask$HadoopDetermineConfigInnerProcessing", new String[] { toolbox.getObjectMapper().writeValueAsString(spec), toolbox.getConfig().getHadoopWorkingPath(), toolbox.getSegmentPusher().getPathForHadoop() }, loader);
final HadoopIngestionSpec indexerSchema = toolbox.getObjectMapper().readValue(config, HadoopIngestionSpec.class);
// We should have a lock from before we started running only if interval was specified
String version;
if (determineIntervals) {
Interval interval = JodaUtils.umbrellaInterval(JodaUtils.condenseIntervals(indexerSchema.getDataSchema().getGranularitySpec().bucketIntervals().get()));
TaskLock lock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
version = lock.getVersion();
} else {
Iterable<TaskLock> locks = getTaskLocks(toolbox);
final TaskLock myLock = Iterables.getOnlyElement(locks);
version = myLock.getVersion();
}
final String specVersion = indexerSchema.getTuningConfig().getVersion();
if (indexerSchema.getTuningConfig().isUseExplicitVersion()) {
if (specVersion.compareTo(version) < 0) {
version = specVersion;
} else {
log.error("Spec version can not be greater than or equal to the lock version, Spec version: [%s] Lock version: [%s].", specVersion, version);
return TaskStatus.failure(getId());
}
}
log.info("Setting version to: %s", version);
final String segments = invokeForeignLoader("io.druid.indexing.common.task.HadoopIndexTask$HadoopIndexGeneratorInnerProcessing", new String[] { toolbox.getObjectMapper().writeValueAsString(indexerSchema), version }, loader);
if (segments != null) {
List<DataSegment> publishedSegments = toolbox.getObjectMapper().readValue(segments, new TypeReference<List<DataSegment>>() {
});
toolbox.publishSegments(publishedSegments);
return TaskStatus.success(getId());
} else {
return TaskStatus.failure(getId());
}
}
use of io.druid.indexing.common.TaskLock in project druid by druid-io.
the class IndexTask method run.
@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
final boolean determineIntervals = !ingestionSchema.getDataSchema().getGranularitySpec().bucketIntervals().isPresent();
final FirehoseFactory delegateFirehoseFactory = ingestionSchema.getIOConfig().getFirehoseFactory();
if (delegateFirehoseFactory instanceof IngestSegmentFirehoseFactory) {
// pass toolbox to Firehose
((IngestSegmentFirehoseFactory) delegateFirehoseFactory).setTaskToolbox(toolbox);
}
final FirehoseFactory firehoseFactory;
if (ingestionSchema.getIOConfig().isSkipFirehoseCaching() || delegateFirehoseFactory instanceof ReplayableFirehoseFactory) {
firehoseFactory = delegateFirehoseFactory;
} else {
firehoseFactory = new ReplayableFirehoseFactory(delegateFirehoseFactory, ingestionSchema.getTuningConfig().isReportParseExceptions(), null, null, smileMapper);
}
final Map<Interval, List<ShardSpec>> shardSpecs = determineShardSpecs(toolbox, firehoseFactory);
final String version;
final DataSchema dataSchema;
if (determineIntervals) {
Interval interval = JodaUtils.umbrellaInterval(shardSpecs.keySet());
TaskLock lock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
version = lock.getVersion();
dataSchema = ingestionSchema.getDataSchema().withGranularitySpec(ingestionSchema.getDataSchema().getGranularitySpec().withIntervals(JodaUtils.condenseIntervals(shardSpecs.keySet())));
} else {
version = Iterables.getOnlyElement(getTaskLocks(toolbox)).getVersion();
dataSchema = ingestionSchema.getDataSchema();
}
if (generateAndPublishSegments(toolbox, dataSchema, shardSpecs, version, firehoseFactory)) {
return TaskStatus.success(getId());
} else {
return TaskStatus.failure(getId());
}
}
use of io.druid.indexing.common.TaskLock in project druid by druid-io.
the class KillTask method run.
@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
// Confirm we have a lock (will throw if there isn't exactly one element)
final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox));
if (!myLock.getDataSource().equals(getDataSource())) {
throw new ISE("WTF?! Lock dataSource[%s] != task dataSource[%s]", myLock.getDataSource(), getDataSource());
}
if (!myLock.getInterval().equals(getInterval())) {
throw new ISE("WTF?! Lock interval[%s] != task interval[%s]", myLock.getInterval(), getInterval());
}
// List unused segments
final List<DataSegment> unusedSegments = toolbox.getTaskActionClient().submit(new SegmentListUnusedAction(myLock.getDataSource(), myLock.getInterval()));
// Verify none of these segments have versions > lock version
for (final DataSegment unusedSegment : unusedSegments) {
if (unusedSegment.getVersion().compareTo(myLock.getVersion()) > 0) {
throw new ISE("WTF?! Unused segment[%s] has version[%s] > task version[%s]", unusedSegment.getIdentifier(), unusedSegment.getVersion(), myLock.getVersion());
}
log.info("OK to kill segment: %s", unusedSegment.getIdentifier());
}
// Kill segments
for (DataSegment segment : unusedSegments) {
toolbox.getDataSegmentKiller().kill(segment);
toolbox.getTaskActionClient().submit(new SegmentNukeAction(ImmutableSet.of(segment)));
}
return TaskStatus.success(getId());
}
use of io.druid.indexing.common.TaskLock in project druid by druid-io.
the class MergeTaskBase method run.
@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox));
final ServiceEmitter emitter = toolbox.getEmitter();
final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder();
final DataSegment mergedSegment = computeMergedSegment(getDataSource(), myLock.getVersion(), segments);
final File taskDir = toolbox.getTaskWorkDir();
try {
final long startTime = System.currentTimeMillis();
log.info("Starting merge of id[%s], segments: %s", getId(), Lists.transform(segments, new Function<DataSegment, String>() {
@Override
public String apply(DataSegment input) {
return input.getIdentifier();
}
}));
// download segments to merge
final Map<DataSegment, File> gettedSegments = toolbox.fetchSegments(segments);
// merge files together
final File fileToUpload = merge(toolbox, gettedSegments, new File(taskDir, "merged"));
emitter.emit(builder.build("merger/numMerged", segments.size()));
emitter.emit(builder.build("merger/mergeTime", System.currentTimeMillis() - startTime));
log.info("[%s] : Merged %d segments in %,d millis", mergedSegment.getDataSource(), segments.size(), System.currentTimeMillis() - startTime);
long uploadStart = System.currentTimeMillis();
// Upload file
final DataSegment uploadedSegment = toolbox.getSegmentPusher().push(fileToUpload, mergedSegment);
emitter.emit(builder.build("merger/uploadTime", System.currentTimeMillis() - uploadStart));
emitter.emit(builder.build("merger/mergeSize", uploadedSegment.getSize()));
toolbox.publishSegments(ImmutableList.of(uploadedSegment));
return TaskStatus.success(getId());
} catch (Exception e) {
log.makeAlert(e, "Exception merging[%s]", mergedSegment.getDataSource()).addData("interval", mergedSegment.getInterval()).emit();
return TaskStatus.failure(getId());
}
}
use of io.druid.indexing.common.TaskLock in project druid by druid-io.
the class MoveTask method run.
@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
// Confirm we have a lock (will throw if there isn't exactly one element)
final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox));
if (!myLock.getDataSource().equals(getDataSource())) {
throw new ISE("WTF?! Lock dataSource[%s] != task dataSource[%s]", myLock.getDataSource(), getDataSource());
}
if (!myLock.getInterval().equals(getInterval())) {
throw new ISE("WTF?! Lock interval[%s] != task interval[%s]", myLock.getInterval(), getInterval());
}
// List unused segments
final List<DataSegment> unusedSegments = toolbox.getTaskActionClient().submit(new SegmentListUnusedAction(myLock.getDataSource(), myLock.getInterval()));
// Verify none of these segments have versions > lock version
for (final DataSegment unusedSegment : unusedSegments) {
if (unusedSegment.getVersion().compareTo(myLock.getVersion()) > 0) {
throw new ISE("WTF?! Unused segment[%s] has version[%s] > task version[%s]", unusedSegment.getIdentifier(), unusedSegment.getVersion(), myLock.getVersion());
}
log.info("OK to move segment: %s", unusedSegment.getIdentifier());
}
// Move segments
for (DataSegment segment : unusedSegments) {
final DataSegment movedSegment = toolbox.getDataSegmentMover().move(segment, targetLoadSpec);
toolbox.getTaskActionClient().submit(new SegmentMetadataUpdateAction(ImmutableSet.of(movedSegment)));
}
return TaskStatus.success(getId());
}
Aggregations