Search in sources :

Example 1 with ClientBasedTaskInfoProvider

use of org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider in project druid by druid-io.

the class PartialSegmentMergeTask method runTask.

@Override
public TaskStatus runTask(TaskToolbox toolbox) throws Exception {
    // Group partitionLocations by interval and partitionId
    final Map<Interval, Int2ObjectMap<List<PartitionLocation>>> intervalToBuckets = new HashMap<>();
    for (PartitionLocation location : ioConfig.getPartitionLocations()) {
        intervalToBuckets.computeIfAbsent(location.getInterval(), k -> new Int2ObjectOpenHashMap<>()).computeIfAbsent(location.getBucketId(), k -> new ArrayList<>()).add(location);
    }
    final List<TaskLock> locks = toolbox.getTaskActionClient().submit(new SurrogateAction<>(supervisorTaskId, new LockListAction()));
    final Map<Interval, String> intervalToVersion = Maps.newHashMapWithExpectedSize(locks.size());
    locks.forEach(lock -> {
        if (lock.isRevoked()) {
            throw new ISE("Lock[%s] is revoked", lock);
        }
        final String mustBeNull = intervalToVersion.put(lock.getInterval(), lock.getVersion());
        if (mustBeNull != null) {
            throw new ISE("Unexpected state: Two versions([%s], [%s]) for the same interval[%s]", lock.getVersion(), mustBeNull, lock.getInterval());
        }
    });
    final Stopwatch fetchStopwatch = Stopwatch.createStarted();
    final Map<Interval, Int2ObjectMap<List<File>>> intervalToUnzippedFiles = fetchSegmentFiles(toolbox, intervalToBuckets);
    final long fetchTime = fetchStopwatch.elapsed(TimeUnit.SECONDS);
    fetchStopwatch.stop();
    LOG.info("Fetch took [%s] seconds", fetchTime);
    final ParallelIndexSupervisorTaskClient taskClient = toolbox.getSupervisorTaskClientFactory().build(new ClientBasedTaskInfoProvider(toolbox.getIndexingServiceClient()), getId(), // always use a single http thread
    1, getTuningConfig().getChatHandlerTimeout(), getTuningConfig().getChatHandlerNumRetries());
    final File persistDir = toolbox.getPersistDir();
    org.apache.commons.io.FileUtils.deleteQuietly(persistDir);
    FileUtils.mkdirp(persistDir);
    final Set<DataSegment> pushedSegments = mergeAndPushSegments(toolbox, getDataSchema(), getTuningConfig(), persistDir, intervalToVersion, intervalToUnzippedFiles);
    taskClient.report(supervisorTaskId, new PushedSegmentsReport(getId(), Collections.emptySet(), pushedSegments, ImmutableMap.of()));
    return TaskStatus.success(getId());
}
Also used : TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) LockListAction(org.apache.druid.indexing.common.actions.LockListAction) Logger(org.apache.druid.java.util.common.logger.Logger) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) Arrays(java.util.Arrays) Stopwatch(com.google.common.base.Stopwatch) HashMap(java.util.HashMap) TaskResource(org.apache.druid.indexing.common.task.TaskResource) TaskStatus(org.apache.druid.indexer.TaskStatus) Pair(org.apache.druid.java.util.common.Pair) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator) Interval(org.joda.time.Interval) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) Map(java.util.Map) TaskLock(org.apache.druid.indexing.common.TaskLock) RetryUtils(org.apache.druid.java.util.common.RetryUtils) IndexMergerV9(org.apache.druid.segment.IndexMergerV9) FileUtils(org.apache.druid.java.util.common.FileUtils) Nullable(javax.annotation.Nullable) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) ClientBasedTaskInfoProvider(org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider) IndexMerger(org.apache.druid.segment.IndexMerger) Closer(org.apache.druid.java.util.common.io.Closer) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) QueryableIndex(org.apache.druid.segment.QueryableIndex) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) Collectors(java.util.stream.Collectors) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) SurrogateAction(org.apache.druid.indexing.common.actions.SurrogateAction) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) Preconditions(com.google.common.base.Preconditions) IndexIO(org.apache.druid.segment.IndexIO) DataSchema(org.apache.druid.segment.indexing.DataSchema) Collections(java.util.Collections) LockListAction(org.apache.druid.indexing.common.actions.LockListAction) HashMap(java.util.HashMap) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) ArrayList(java.util.ArrayList) Stopwatch(com.google.common.base.Stopwatch) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) DataSegment(org.apache.druid.timeline.DataSegment) TaskLock(org.apache.druid.indexing.common.TaskLock) ISE(org.apache.druid.java.util.common.ISE) ClientBasedTaskInfoProvider(org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider) File(java.io.File) Interval(org.joda.time.Interval)

Example 2 with ClientBasedTaskInfoProvider

use of org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider in project druid by druid-io.

the class PartialDimensionCardinalityTask method sendReport.

private void sendReport(TaskToolbox toolbox, DimensionCardinalityReport report) {
    final ParallelIndexSupervisorTaskClient taskClient = toolbox.getSupervisorTaskClientFactory().build(new ClientBasedTaskInfoProvider(toolbox.getIndexingServiceClient()), getId(), // always use a single http thread
    1, ingestionSchema.getTuningConfig().getChatHandlerTimeout(), ingestionSchema.getTuningConfig().getChatHandlerNumRetries());
    taskClient.report(supervisorTaskId, report);
}
Also used : ClientBasedTaskInfoProvider(org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider)

Example 3 with ClientBasedTaskInfoProvider

use of org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider in project druid by druid-io.

the class PartialDimensionDistributionTask method sendReport.

private void sendReport(TaskToolbox toolbox, DimensionDistributionReport report) {
    final ParallelIndexSupervisorTaskClient taskClient = toolbox.getSupervisorTaskClientFactory().build(new ClientBasedTaskInfoProvider(toolbox.getIndexingServiceClient()), getId(), // always use a single http thread
    1, ingestionSchema.getTuningConfig().getChatHandlerTimeout(), ingestionSchema.getTuningConfig().getChatHandlerNumRetries());
    taskClient.report(supervisorTaskId, report);
}
Also used : ClientBasedTaskInfoProvider(org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider)

Example 4 with ClientBasedTaskInfoProvider

use of org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider in project druid by druid-io.

the class PartialSegmentGenerateTask method runTask.

@Override
public final TaskStatus runTask(TaskToolbox toolbox) throws Exception {
    final InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource(ingestionSchema.getDataSchema().getParser());
    final ParallelIndexSupervisorTaskClient taskClient = toolbox.getSupervisorTaskClientFactory().build(new ClientBasedTaskInfoProvider(toolbox.getIndexingServiceClient()), getId(), // always use a single http thread
    1, ingestionSchema.getTuningConfig().getChatHandlerTimeout(), ingestionSchema.getTuningConfig().getChatHandlerNumRetries());
    final List<DataSegment> segments = generateSegments(toolbox, taskClient, inputSource, toolbox.getIndexingTmpDir());
    taskClient.report(supervisorTaskId, createGeneratedPartitionsReport(toolbox, segments));
    return TaskStatus.success(getId());
}
Also used : InputSource(org.apache.druid.data.input.InputSource) ClientBasedTaskInfoProvider(org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider) DataSegment(org.apache.druid.timeline.DataSegment)

Example 5 with ClientBasedTaskInfoProvider

use of org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider in project druid by druid-io.

the class SinglePhaseSubTask method runTask.

@Override
public TaskStatus runTask(final TaskToolbox toolbox) {
    try {
        if (missingIntervalsInOverwriteMode) {
            LOG.warn("Intervals are missing in granularitySpec while this task is potentially overwriting existing segments. " + "Forced to use timeChunk lock.");
        }
        this.authorizerMapper = toolbox.getAuthorizerMapper();
        toolbox.getChatHandlerProvider().register(getId(), this, false);
        rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
        parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, ingestionSchema.getTuningConfig().isLogParseExceptions(), ingestionSchema.getTuningConfig().getMaxParseExceptions(), ingestionSchema.getTuningConfig().getMaxSavedParseExceptions());
        final InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource(ingestionSchema.getDataSchema().getParser());
        final ParallelIndexSupervisorTaskClient taskClient = toolbox.getSupervisorTaskClientFactory().build(new ClientBasedTaskInfoProvider(toolbox.getIndexingServiceClient()), getId(), // always use a single http thread
        1, ingestionSchema.getTuningConfig().getChatHandlerTimeout(), ingestionSchema.getTuningConfig().getChatHandlerNumRetries());
        ingestionState = IngestionState.BUILD_SEGMENTS;
        final Set<DataSegment> pushedSegments = generateAndPushSegments(toolbox, taskClient, inputSource, toolbox.getIndexingTmpDir());
        // Find inputSegments overshadowed by pushedSegments
        final Set<DataSegment> allSegments = new HashSet<>(getTaskLockHelper().getLockedExistingSegments());
        allSegments.addAll(pushedSegments);
        final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(allSegments);
        final Set<DataSegment> oldSegments = FluentIterable.from(timeline.findFullyOvershadowed()).transformAndConcat(TimelineObjectHolder::getObject).transform(PartitionChunk::getObject).toSet();
        Map<String, TaskReport> taskReport = getTaskCompletionReports();
        taskClient.report(supervisorTaskId, new PushedSegmentsReport(getId(), oldSegments, pushedSegments, taskReport));
        toolbox.getTaskReportFileWriter().write(getId(), taskReport);
        return TaskStatus.success(getId());
    } catch (Exception e) {
        LOG.error(e, "Encountered exception in parallel sub task.");
        errorMsg = Throwables.getStackTraceAsString(e);
        toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
        return TaskStatus.failure(getId(), errorMsg);
    } finally {
        toolbox.getChatHandlerProvider().unregister(getId());
    }
}
Also used : InputSource(org.apache.druid.data.input.InputSource) TaskReport(org.apache.druid.indexing.common.TaskReport) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) DataSegment(org.apache.druid.timeline.DataSegment) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) ClientBasedTaskInfoProvider(org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider) HashSet(java.util.HashSet)

Aggregations

ClientBasedTaskInfoProvider (org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider)5 IOException (java.io.IOException)2 HashSet (java.util.HashSet)2 InputSource (org.apache.druid.data.input.InputSource)2 DataSegment (org.apache.druid.timeline.DataSegment)2 JsonProperty (com.fasterxml.jackson.annotation.JsonProperty)1 Preconditions (com.google.common.base.Preconditions)1 Stopwatch (com.google.common.base.Stopwatch)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 Maps (com.google.common.collect.Maps)1 Int2ObjectMap (it.unimi.dsi.fastutil.ints.Int2ObjectMap)1 Int2ObjectOpenHashMap (it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap)1 File (java.io.File)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Entry (java.util.Map.Entry)1