use of org.apache.druid.indexing.common.TaskReport in project druid by druid-io.
the class ParallelIndexSupervisorTask method doGetRowStatsAndUnparseableEventsParallelSinglePhase.
private Pair<Map<String, Object>, Map<String, Object>> doGetRowStatsAndUnparseableEventsParallelSinglePhase(SinglePhaseParallelIndexTaskRunner parallelSinglePhaseRunner, boolean includeUnparseable) {
long processed = 0L;
long processedWithError = 0L;
long thrownAway = 0L;
long unparseable = 0L;
List<ParseExceptionReport> unparseableEvents = new ArrayList<>();
// Get stats from completed tasks
Map<String, PushedSegmentsReport> completedSubtaskReports = parallelSinglePhaseRunner.getReports();
for (PushedSegmentsReport pushedSegmentsReport : completedSubtaskReports.values()) {
Map<String, TaskReport> taskReport = pushedSegmentsReport.getTaskReport();
if (taskReport == null || taskReport.isEmpty()) {
LOG.warn("Got an empty task report from subtask: " + pushedSegmentsReport.getTaskId());
continue;
}
IngestionStatsAndErrorsTaskReport ingestionStatsAndErrorsReport = (IngestionStatsAndErrorsTaskReport) taskReport.get(IngestionStatsAndErrorsTaskReport.REPORT_KEY);
IngestionStatsAndErrorsTaskReportData reportData = (IngestionStatsAndErrorsTaskReportData) ingestionStatsAndErrorsReport.getPayload();
RowIngestionMetersTotals totals = getTotalsFromBuildSegmentsRowStats(reportData.getRowStats().get(RowIngestionMeters.BUILD_SEGMENTS));
if (includeUnparseable) {
List<ParseExceptionReport> taskUnparsebleEvents = (List<ParseExceptionReport>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
unparseableEvents.addAll(taskUnparsebleEvents);
}
processed += totals.getProcessed();
processedWithError += totals.getProcessedWithError();
thrownAway += totals.getThrownAway();
unparseable += totals.getUnparseable();
}
// Get stats from running tasks
Set<String> runningTaskIds = parallelSinglePhaseRunner.getRunningTaskIds();
for (String runningTaskId : runningTaskIds) {
try {
Map<String, Object> report = toolbox.getIndexingServiceClient().getTaskReport(runningTaskId);
if (report == null || report.isEmpty()) {
// task does not have a running report yet
continue;
}
Map<String, Object> ingestionStatsAndErrors = (Map<String, Object>) report.get("ingestionStatsAndErrors");
Map<String, Object> payload = (Map<String, Object>) ingestionStatsAndErrors.get("payload");
Map<String, Object> rowStats = (Map<String, Object>) payload.get("rowStats");
Map<String, Object> totals = (Map<String, Object>) rowStats.get("totals");
Map<String, Object> buildSegments = (Map<String, Object>) totals.get(RowIngestionMeters.BUILD_SEGMENTS);
if (includeUnparseable) {
Map<String, Object> taskUnparseableEvents = (Map<String, Object>) payload.get("unparseableEvents");
List<ParseExceptionReport> buildSegmentsUnparseableEvents = (List<ParseExceptionReport>) taskUnparseableEvents.get(RowIngestionMeters.BUILD_SEGMENTS);
unparseableEvents.addAll(buildSegmentsUnparseableEvents);
}
processed += ((Number) buildSegments.get("processed")).longValue();
processedWithError += ((Number) buildSegments.get("processedWithError")).longValue();
thrownAway += ((Number) buildSegments.get("thrownAway")).longValue();
unparseable += ((Number) buildSegments.get("unparseable")).longValue();
} catch (Exception e) {
LOG.warn(e, "Encountered exception when getting live subtask report for task: " + runningTaskId);
}
}
Map<String, Object> rowStatsMap = new HashMap<>();
Map<String, Object> totalsMap = new HashMap<>();
totalsMap.put(RowIngestionMeters.BUILD_SEGMENTS, new RowIngestionMetersTotals(processed, processedWithError, thrownAway, unparseable));
rowStatsMap.put("totals", totalsMap);
return Pair.of(rowStatsMap, ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, unparseableEvents));
}
use of org.apache.druid.indexing.common.TaskReport in project druid by druid-io.
the class AbstractITBatchIndexTest method submitTaskAndWait.
protected void submitTaskAndWait(String taskSpec, String dataSourceName, boolean waitForNewVersion, boolean waitForSegmentsToLoad, Pair<Boolean, Boolean> segmentAvailabilityConfirmationPair) {
final List<DataSegment> oldVersions = waitForNewVersion ? coordinator.getAvailableSegments(dataSourceName) : null;
long startSubTaskCount = -1;
final boolean assertRunsSubTasks = taskSpec.contains("index_parallel");
if (assertRunsSubTasks) {
startSubTaskCount = countCompleteSubTasks(dataSourceName, !taskSpec.contains("dynamic"));
}
final String taskID = indexer.submitTask(taskSpec);
LOG.info("TaskID for loading index task %s", taskID);
indexer.waitUntilTaskCompletes(taskID);
if (assertRunsSubTasks) {
final boolean perfectRollup = !taskSpec.contains("dynamic");
final long newSubTasks = countCompleteSubTasks(dataSourceName, perfectRollup) - startSubTaskCount;
Assert.assertTrue(newSubTasks > 0, StringUtils.format("The supervisor task[%s] didn't create any sub tasks. Was it executed in the parallel mode?", taskID));
}
if (segmentAvailabilityConfirmationPair.lhs != null && segmentAvailabilityConfirmationPair.lhs) {
TaskReport reportRaw = indexer.getTaskReport(taskID).get("ingestionStatsAndErrors");
IngestionStatsAndErrorsTaskReport report = (IngestionStatsAndErrorsTaskReport) reportRaw;
IngestionStatsAndErrorsTaskReportData reportData = (IngestionStatsAndErrorsTaskReportData) report.getPayload();
// Confirm that the task waited longer than 0ms for the task to complete.
Assert.assertTrue(reportData.getSegmentAvailabilityWaitTimeMs() > 0);
// Make sure that the result of waiting for segments to load matches the expected result
if (segmentAvailabilityConfirmationPair.rhs != null) {
Assert.assertEquals(Boolean.valueOf(reportData.isSegmentAvailabilityConfirmed()), segmentAvailabilityConfirmationPair.rhs);
}
}
// original segments have loaded.
if (waitForNewVersion) {
ITRetryUtil.retryUntilTrue(() -> {
final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(coordinator.getAvailableSegments(dataSourceName));
final List<TimelineObjectHolder<String, DataSegment>> holders = timeline.lookup(Intervals.ETERNITY);
return FluentIterable.from(holders).transformAndConcat(TimelineObjectHolder::getObject).anyMatch(chunk -> FluentIterable.from(oldVersions).anyMatch(oldSegment -> chunk.getObject().overshadows(oldSegment)));
}, "See a new version");
}
if (waitForSegmentsToLoad) {
ITRetryUtil.retryUntilTrue(() -> coordinator.areSegmentsLoaded(dataSourceName), "Segment Load");
}
}
use of org.apache.druid.indexing.common.TaskReport in project druid by druid-io.
the class SinglePhaseSubTask method runTask.
@Override
public TaskStatus runTask(final TaskToolbox toolbox) {
try {
if (missingIntervalsInOverwriteMode) {
LOG.warn("Intervals are missing in granularitySpec while this task is potentially overwriting existing segments. " + "Forced to use timeChunk lock.");
}
this.authorizerMapper = toolbox.getAuthorizerMapper();
toolbox.getChatHandlerProvider().register(getId(), this, false);
rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, ingestionSchema.getTuningConfig().isLogParseExceptions(), ingestionSchema.getTuningConfig().getMaxParseExceptions(), ingestionSchema.getTuningConfig().getMaxSavedParseExceptions());
final InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource(ingestionSchema.getDataSchema().getParser());
final ParallelIndexSupervisorTaskClient taskClient = toolbox.getSupervisorTaskClientFactory().build(new ClientBasedTaskInfoProvider(toolbox.getIndexingServiceClient()), getId(), // always use a single http thread
1, ingestionSchema.getTuningConfig().getChatHandlerTimeout(), ingestionSchema.getTuningConfig().getChatHandlerNumRetries());
ingestionState = IngestionState.BUILD_SEGMENTS;
final Set<DataSegment> pushedSegments = generateAndPushSegments(toolbox, taskClient, inputSource, toolbox.getIndexingTmpDir());
// Find inputSegments overshadowed by pushedSegments
final Set<DataSegment> allSegments = new HashSet<>(getTaskLockHelper().getLockedExistingSegments());
allSegments.addAll(pushedSegments);
final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(allSegments);
final Set<DataSegment> oldSegments = FluentIterable.from(timeline.findFullyOvershadowed()).transformAndConcat(TimelineObjectHolder::getObject).transform(PartitionChunk::getObject).toSet();
Map<String, TaskReport> taskReport = getTaskCompletionReports();
taskClient.report(supervisorTaskId, new PushedSegmentsReport(getId(), oldSegments, pushedSegments, taskReport));
toolbox.getTaskReportFileWriter().write(getId(), taskReport);
return TaskStatus.success(getId());
} catch (Exception e) {
LOG.error(e, "Encountered exception in parallel sub task.");
errorMsg = Throwables.getStackTraceAsString(e);
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.failure(getId(), errorMsg);
} finally {
toolbox.getChatHandlerProvider().unregister(getId());
}
}
use of org.apache.druid.indexing.common.TaskReport in project druid by druid-io.
the class TaskReportSerdeTest method testSerde.
@Test
public void testSerde() throws Exception {
IngestionStatsAndErrorsTaskReport report1 = new IngestionStatsAndErrorsTaskReport("testID", new IngestionStatsAndErrorsTaskReportData(IngestionState.BUILD_SEGMENTS, ImmutableMap.of("hello", "world"), ImmutableMap.of("number", 1234), "an error message", true, 1000L));
String report1serialized = jsonMapper.writeValueAsString(report1);
IngestionStatsAndErrorsTaskReport report2 = jsonMapper.readValue(report1serialized, IngestionStatsAndErrorsTaskReport.class);
Assert.assertEquals(report1, report2);
Assert.assertEquals(report1.hashCode(), report2.hashCode());
Map<String, TaskReport> reportMap1 = TaskReport.buildTaskReports(report1);
String reportMapSerialized = jsonMapper.writeValueAsString(reportMap1);
Map<String, TaskReport> reportMap2 = jsonMapper.readValue(reportMapSerialized, new TypeReference<Map<String, TaskReport>>() {
});
Assert.assertEquals(reportMap1, reportMap2);
}
Aggregations