Search in sources :

Example 11 with TaskLock

use of io.druid.indexing.common.TaskLock in project druid by druid-io.

the class HadoopIndexTask method run.

@SuppressWarnings("unchecked")
@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
    final ClassLoader loader = buildClassLoader(toolbox);
    boolean determineIntervals = !spec.getDataSchema().getGranularitySpec().bucketIntervals().isPresent();
    spec = HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed(spec, jsonMapper, new OverlordActionBasedUsedSegmentLister(toolbox));
    final String config = invokeForeignLoader("io.druid.indexing.common.task.HadoopIndexTask$HadoopDetermineConfigInnerProcessing", new String[] { toolbox.getObjectMapper().writeValueAsString(spec), toolbox.getConfig().getHadoopWorkingPath(), toolbox.getSegmentPusher().getPathForHadoop() }, loader);
    final HadoopIngestionSpec indexerSchema = toolbox.getObjectMapper().readValue(config, HadoopIngestionSpec.class);
    // We should have a lock from before we started running only if interval was specified
    String version;
    if (determineIntervals) {
        Interval interval = JodaUtils.umbrellaInterval(JodaUtils.condenseIntervals(indexerSchema.getDataSchema().getGranularitySpec().bucketIntervals().get()));
        TaskLock lock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
        version = lock.getVersion();
    } else {
        Iterable<TaskLock> locks = getTaskLocks(toolbox);
        final TaskLock myLock = Iterables.getOnlyElement(locks);
        version = myLock.getVersion();
    }
    final String specVersion = indexerSchema.getTuningConfig().getVersion();
    if (indexerSchema.getTuningConfig().isUseExplicitVersion()) {
        if (specVersion.compareTo(version) < 0) {
            version = specVersion;
        } else {
            log.error("Spec version can not be greater than or equal to the lock version, Spec version: [%s] Lock version: [%s].", specVersion, version);
            return TaskStatus.failure(getId());
        }
    }
    log.info("Setting version to: %s", version);
    final String segments = invokeForeignLoader("io.druid.indexing.common.task.HadoopIndexTask$HadoopIndexGeneratorInnerProcessing", new String[] { toolbox.getObjectMapper().writeValueAsString(indexerSchema), version }, loader);
    if (segments != null) {
        List<DataSegment> publishedSegments = toolbox.getObjectMapper().readValue(segments, new TypeReference<List<DataSegment>>() {
        });
        toolbox.publishSegments(publishedSegments);
        return TaskStatus.success(getId());
    } else {
        return TaskStatus.failure(getId());
    }
}
Also used : HadoopIngestionSpec(io.druid.indexer.HadoopIngestionSpec) OverlordActionBasedUsedSegmentLister(io.druid.indexing.hadoop.OverlordActionBasedUsedSegmentLister) DataSegment(io.druid.timeline.DataSegment) TaskLock(io.druid.indexing.common.TaskLock) LockAcquireAction(io.druid.indexing.common.actions.LockAcquireAction) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Interval(org.joda.time.Interval)

Example 12 with TaskLock

use of io.druid.indexing.common.TaskLock in project druid by druid-io.

the class IndexTask method run.

@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
    final boolean determineIntervals = !ingestionSchema.getDataSchema().getGranularitySpec().bucketIntervals().isPresent();
    final FirehoseFactory delegateFirehoseFactory = ingestionSchema.getIOConfig().getFirehoseFactory();
    if (delegateFirehoseFactory instanceof IngestSegmentFirehoseFactory) {
        // pass toolbox to Firehose
        ((IngestSegmentFirehoseFactory) delegateFirehoseFactory).setTaskToolbox(toolbox);
    }
    final FirehoseFactory firehoseFactory;
    if (ingestionSchema.getIOConfig().isSkipFirehoseCaching() || delegateFirehoseFactory instanceof ReplayableFirehoseFactory) {
        firehoseFactory = delegateFirehoseFactory;
    } else {
        firehoseFactory = new ReplayableFirehoseFactory(delegateFirehoseFactory, ingestionSchema.getTuningConfig().isReportParseExceptions(), null, null, smileMapper);
    }
    final Map<Interval, List<ShardSpec>> shardSpecs = determineShardSpecs(toolbox, firehoseFactory);
    final String version;
    final DataSchema dataSchema;
    if (determineIntervals) {
        Interval interval = JodaUtils.umbrellaInterval(shardSpecs.keySet());
        TaskLock lock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
        version = lock.getVersion();
        dataSchema = ingestionSchema.getDataSchema().withGranularitySpec(ingestionSchema.getDataSchema().getGranularitySpec().withIntervals(JodaUtils.condenseIntervals(shardSpecs.keySet())));
    } else {
        version = Iterables.getOnlyElement(getTaskLocks(toolbox)).getVersion();
        dataSchema = ingestionSchema.getDataSchema();
    }
    if (generateAndPublishSegments(toolbox, dataSchema, shardSpecs, version, firehoseFactory)) {
        return TaskStatus.success(getId());
    } else {
        return TaskStatus.failure(getId());
    }
}
Also used : IngestSegmentFirehoseFactory(io.druid.indexing.firehose.IngestSegmentFirehoseFactory) DataSchema(io.druid.segment.indexing.DataSchema) TaskLock(io.druid.indexing.common.TaskLock) IngestSegmentFirehoseFactory(io.druid.indexing.firehose.IngestSegmentFirehoseFactory) ReplayableFirehoseFactory(io.druid.segment.realtime.firehose.ReplayableFirehoseFactory) FirehoseFactory(io.druid.data.input.FirehoseFactory) LockAcquireAction(io.druid.indexing.common.actions.LockAcquireAction) ReplayableFirehoseFactory(io.druid.segment.realtime.firehose.ReplayableFirehoseFactory) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Interval(org.joda.time.Interval)

Example 13 with TaskLock

use of io.druid.indexing.common.TaskLock in project druid by druid-io.

the class KillTask method run.

@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
    // Confirm we have a lock (will throw if there isn't exactly one element)
    final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox));
    if (!myLock.getDataSource().equals(getDataSource())) {
        throw new ISE("WTF?! Lock dataSource[%s] != task dataSource[%s]", myLock.getDataSource(), getDataSource());
    }
    if (!myLock.getInterval().equals(getInterval())) {
        throw new ISE("WTF?! Lock interval[%s] != task interval[%s]", myLock.getInterval(), getInterval());
    }
    // List unused segments
    final List<DataSegment> unusedSegments = toolbox.getTaskActionClient().submit(new SegmentListUnusedAction(myLock.getDataSource(), myLock.getInterval()));
    // Verify none of these segments have versions > lock version
    for (final DataSegment unusedSegment : unusedSegments) {
        if (unusedSegment.getVersion().compareTo(myLock.getVersion()) > 0) {
            throw new ISE("WTF?! Unused segment[%s] has version[%s] > task version[%s]", unusedSegment.getIdentifier(), unusedSegment.getVersion(), myLock.getVersion());
        }
        log.info("OK to kill segment: %s", unusedSegment.getIdentifier());
    }
    // Kill segments
    for (DataSegment segment : unusedSegments) {
        toolbox.getDataSegmentKiller().kill(segment);
        toolbox.getTaskActionClient().submit(new SegmentNukeAction(ImmutableSet.of(segment)));
    }
    return TaskStatus.success(getId());
}
Also used : TaskLock(io.druid.indexing.common.TaskLock) SegmentNukeAction(io.druid.indexing.common.actions.SegmentNukeAction) ISE(io.druid.java.util.common.ISE) SegmentListUnusedAction(io.druid.indexing.common.actions.SegmentListUnusedAction) DataSegment(io.druid.timeline.DataSegment)

Example 14 with TaskLock

use of io.druid.indexing.common.TaskLock in project druid by druid-io.

the class MergeTaskBase method run.

@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
    final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox));
    final ServiceEmitter emitter = toolbox.getEmitter();
    final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder();
    final DataSegment mergedSegment = computeMergedSegment(getDataSource(), myLock.getVersion(), segments);
    final File taskDir = toolbox.getTaskWorkDir();
    try {
        final long startTime = System.currentTimeMillis();
        log.info("Starting merge of id[%s], segments: %s", getId(), Lists.transform(segments, new Function<DataSegment, String>() {

            @Override
            public String apply(DataSegment input) {
                return input.getIdentifier();
            }
        }));
        // download segments to merge
        final Map<DataSegment, File> gettedSegments = toolbox.fetchSegments(segments);
        // merge files together
        final File fileToUpload = merge(toolbox, gettedSegments, new File(taskDir, "merged"));
        emitter.emit(builder.build("merger/numMerged", segments.size()));
        emitter.emit(builder.build("merger/mergeTime", System.currentTimeMillis() - startTime));
        log.info("[%s] : Merged %d segments in %,d millis", mergedSegment.getDataSource(), segments.size(), System.currentTimeMillis() - startTime);
        long uploadStart = System.currentTimeMillis();
        // Upload file
        final DataSegment uploadedSegment = toolbox.getSegmentPusher().push(fileToUpload, mergedSegment);
        emitter.emit(builder.build("merger/uploadTime", System.currentTimeMillis() - uploadStart));
        emitter.emit(builder.build("merger/mergeSize", uploadedSegment.getSize()));
        toolbox.publishSegments(ImmutableList.of(uploadedSegment));
        return TaskStatus.success(getId());
    } catch (Exception e) {
        log.makeAlert(e, "Exception merging[%s]", mergedSegment.getDataSource()).addData("interval", mergedSegment.getInterval()).emit();
        return TaskStatus.failure(getId());
    }
}
Also used : ServiceEmitter(com.metamx.emitter.service.ServiceEmitter) Function(com.google.common.base.Function) TaskLock(io.druid.indexing.common.TaskLock) ServiceMetricEvent(com.metamx.emitter.service.ServiceMetricEvent) DataSegment(io.druid.timeline.DataSegment) File(java.io.File)

Example 15 with TaskLock

use of io.druid.indexing.common.TaskLock in project druid by druid-io.

the class MoveTask method run.

@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
    // Confirm we have a lock (will throw if there isn't exactly one element)
    final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox));
    if (!myLock.getDataSource().equals(getDataSource())) {
        throw new ISE("WTF?! Lock dataSource[%s] != task dataSource[%s]", myLock.getDataSource(), getDataSource());
    }
    if (!myLock.getInterval().equals(getInterval())) {
        throw new ISE("WTF?! Lock interval[%s] != task interval[%s]", myLock.getInterval(), getInterval());
    }
    // List unused segments
    final List<DataSegment> unusedSegments = toolbox.getTaskActionClient().submit(new SegmentListUnusedAction(myLock.getDataSource(), myLock.getInterval()));
    // Verify none of these segments have versions > lock version
    for (final DataSegment unusedSegment : unusedSegments) {
        if (unusedSegment.getVersion().compareTo(myLock.getVersion()) > 0) {
            throw new ISE("WTF?! Unused segment[%s] has version[%s] > task version[%s]", unusedSegment.getIdentifier(), unusedSegment.getVersion(), myLock.getVersion());
        }
        log.info("OK to move segment: %s", unusedSegment.getIdentifier());
    }
    // Move segments
    for (DataSegment segment : unusedSegments) {
        final DataSegment movedSegment = toolbox.getDataSegmentMover().move(segment, targetLoadSpec);
        toolbox.getTaskActionClient().submit(new SegmentMetadataUpdateAction(ImmutableSet.of(movedSegment)));
    }
    return TaskStatus.success(getId());
}
Also used : SegmentMetadataUpdateAction(io.druid.indexing.common.actions.SegmentMetadataUpdateAction) TaskLock(io.druid.indexing.common.TaskLock) ISE(io.druid.java.util.common.ISE) SegmentListUnusedAction(io.druid.indexing.common.actions.SegmentListUnusedAction) DataSegment(io.druid.timeline.DataSegment)

Aggregations

TaskLock (io.druid.indexing.common.TaskLock)24 DataSegment (io.druid.timeline.DataSegment)13 Interval (org.joda.time.Interval)13 Task (io.druid.indexing.common.task.Task)8 Test (org.junit.Test)7 LockListAction (io.druid.indexing.common.actions.LockListAction)6 TaskToolbox (io.druid.indexing.common.TaskToolbox)5 LockAcquireAction (io.druid.indexing.common.actions.LockAcquireAction)5 SegmentInsertAction (io.druid.indexing.common.actions.SegmentInsertAction)5 ISE (io.druid.java.util.common.ISE)5 SegmentIdentifier (io.druid.segment.realtime.appenderator.SegmentIdentifier)5 SegmentListUnusedAction (io.druid.indexing.common.actions.SegmentListUnusedAction)4 NoopTask (io.druid.indexing.common.task.NoopTask)4 NumberedShardSpec (io.druid.timeline.partition.NumberedShardSpec)4 File (java.io.File)4 Predicate (com.google.common.base.Predicate)3 TaskStatus (io.druid.indexing.common.TaskStatus)3 SegmentMetadataUpdateAction (io.druid.indexing.common.actions.SegmentMetadataUpdateAction)3 AbstractFixedIntervalTask (io.druid.indexing.common.task.AbstractFixedIntervalTask)3 IndexTask (io.druid.indexing.common.task.IndexTask)3