use of org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider in project druid by druid-io.
the class PartialSegmentMergeTask method runTask.
@Override
public TaskStatus runTask(TaskToolbox toolbox) throws Exception {
// Group partitionLocations by interval and partitionId
final Map<Interval, Int2ObjectMap<List<PartitionLocation>>> intervalToBuckets = new HashMap<>();
for (PartitionLocation location : ioConfig.getPartitionLocations()) {
intervalToBuckets.computeIfAbsent(location.getInterval(), k -> new Int2ObjectOpenHashMap<>()).computeIfAbsent(location.getBucketId(), k -> new ArrayList<>()).add(location);
}
final List<TaskLock> locks = toolbox.getTaskActionClient().submit(new SurrogateAction<>(supervisorTaskId, new LockListAction()));
final Map<Interval, String> intervalToVersion = Maps.newHashMapWithExpectedSize(locks.size());
locks.forEach(lock -> {
if (lock.isRevoked()) {
throw new ISE("Lock[%s] is revoked", lock);
}
final String mustBeNull = intervalToVersion.put(lock.getInterval(), lock.getVersion());
if (mustBeNull != null) {
throw new ISE("Unexpected state: Two versions([%s], [%s]) for the same interval[%s]", lock.getVersion(), mustBeNull, lock.getInterval());
}
});
final Stopwatch fetchStopwatch = Stopwatch.createStarted();
final Map<Interval, Int2ObjectMap<List<File>>> intervalToUnzippedFiles = fetchSegmentFiles(toolbox, intervalToBuckets);
final long fetchTime = fetchStopwatch.elapsed(TimeUnit.SECONDS);
fetchStopwatch.stop();
LOG.info("Fetch took [%s] seconds", fetchTime);
final ParallelIndexSupervisorTaskClient taskClient = toolbox.getSupervisorTaskClientFactory().build(new ClientBasedTaskInfoProvider(toolbox.getIndexingServiceClient()), getId(), // always use a single http thread
1, getTuningConfig().getChatHandlerTimeout(), getTuningConfig().getChatHandlerNumRetries());
final File persistDir = toolbox.getPersistDir();
org.apache.commons.io.FileUtils.deleteQuietly(persistDir);
FileUtils.mkdirp(persistDir);
final Set<DataSegment> pushedSegments = mergeAndPushSegments(toolbox, getDataSchema(), getTuningConfig(), persistDir, intervalToVersion, intervalToUnzippedFiles);
taskClient.report(supervisorTaskId, new PushedSegmentsReport(getId(), Collections.emptySet(), pushedSegments, ImmutableMap.of()));
return TaskStatus.success(getId());
}
use of org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider in project druid by druid-io.
the class PartialDimensionCardinalityTask method sendReport.
private void sendReport(TaskToolbox toolbox, DimensionCardinalityReport report) {
final ParallelIndexSupervisorTaskClient taskClient = toolbox.getSupervisorTaskClientFactory().build(new ClientBasedTaskInfoProvider(toolbox.getIndexingServiceClient()), getId(), // always use a single http thread
1, ingestionSchema.getTuningConfig().getChatHandlerTimeout(), ingestionSchema.getTuningConfig().getChatHandlerNumRetries());
taskClient.report(supervisorTaskId, report);
}
use of org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider in project druid by druid-io.
the class PartialDimensionDistributionTask method sendReport.
private void sendReport(TaskToolbox toolbox, DimensionDistributionReport report) {
final ParallelIndexSupervisorTaskClient taskClient = toolbox.getSupervisorTaskClientFactory().build(new ClientBasedTaskInfoProvider(toolbox.getIndexingServiceClient()), getId(), // always use a single http thread
1, ingestionSchema.getTuningConfig().getChatHandlerTimeout(), ingestionSchema.getTuningConfig().getChatHandlerNumRetries());
taskClient.report(supervisorTaskId, report);
}
use of org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider in project druid by druid-io.
the class PartialSegmentGenerateTask method runTask.
@Override
public final TaskStatus runTask(TaskToolbox toolbox) throws Exception {
final InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource(ingestionSchema.getDataSchema().getParser());
final ParallelIndexSupervisorTaskClient taskClient = toolbox.getSupervisorTaskClientFactory().build(new ClientBasedTaskInfoProvider(toolbox.getIndexingServiceClient()), getId(), // always use a single http thread
1, ingestionSchema.getTuningConfig().getChatHandlerTimeout(), ingestionSchema.getTuningConfig().getChatHandlerNumRetries());
final List<DataSegment> segments = generateSegments(toolbox, taskClient, inputSource, toolbox.getIndexingTmpDir());
taskClient.report(supervisorTaskId, createGeneratedPartitionsReport(toolbox, segments));
return TaskStatus.success(getId());
}
use of org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider in project druid by druid-io.
the class SinglePhaseSubTask method runTask.
@Override
public TaskStatus runTask(final TaskToolbox toolbox) {
try {
if (missingIntervalsInOverwriteMode) {
LOG.warn("Intervals are missing in granularitySpec while this task is potentially overwriting existing segments. " + "Forced to use timeChunk lock.");
}
this.authorizerMapper = toolbox.getAuthorizerMapper();
toolbox.getChatHandlerProvider().register(getId(), this, false);
rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, ingestionSchema.getTuningConfig().isLogParseExceptions(), ingestionSchema.getTuningConfig().getMaxParseExceptions(), ingestionSchema.getTuningConfig().getMaxSavedParseExceptions());
final InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource(ingestionSchema.getDataSchema().getParser());
final ParallelIndexSupervisorTaskClient taskClient = toolbox.getSupervisorTaskClientFactory().build(new ClientBasedTaskInfoProvider(toolbox.getIndexingServiceClient()), getId(), // always use a single http thread
1, ingestionSchema.getTuningConfig().getChatHandlerTimeout(), ingestionSchema.getTuningConfig().getChatHandlerNumRetries());
ingestionState = IngestionState.BUILD_SEGMENTS;
final Set<DataSegment> pushedSegments = generateAndPushSegments(toolbox, taskClient, inputSource, toolbox.getIndexingTmpDir());
// Find inputSegments overshadowed by pushedSegments
final Set<DataSegment> allSegments = new HashSet<>(getTaskLockHelper().getLockedExistingSegments());
allSegments.addAll(pushedSegments);
final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(allSegments);
final Set<DataSegment> oldSegments = FluentIterable.from(timeline.findFullyOvershadowed()).transformAndConcat(TimelineObjectHolder::getObject).transform(PartitionChunk::getObject).toSet();
Map<String, TaskReport> taskReport = getTaskCompletionReports();
taskClient.report(supervisorTaskId, new PushedSegmentsReport(getId(), oldSegments, pushedSegments, taskReport));
toolbox.getTaskReportFileWriter().write(getId(), taskReport);
return TaskStatus.success(getId());
} catch (Exception e) {
LOG.error(e, "Encountered exception in parallel sub task.");
errorMsg = Throwables.getStackTraceAsString(e);
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.failure(getId(), errorMsg);
} finally {
toolbox.getChatHandlerProvider().unregister(getId());
}
}
Aggregations