Search in sources :

Example 1 with TaskReport

use of org.apache.druid.indexing.common.TaskReport in project druid by druid-io.

the class ParallelIndexSupervisorTask method doGetRowStatsAndUnparseableEventsParallelSinglePhase.

private Pair<Map<String, Object>, Map<String, Object>> doGetRowStatsAndUnparseableEventsParallelSinglePhase(SinglePhaseParallelIndexTaskRunner parallelSinglePhaseRunner, boolean includeUnparseable) {
    long processed = 0L;
    long processedWithError = 0L;
    long thrownAway = 0L;
    long unparseable = 0L;
    List<ParseExceptionReport> unparseableEvents = new ArrayList<>();
    // Get stats from completed tasks
    Map<String, PushedSegmentsReport> completedSubtaskReports = parallelSinglePhaseRunner.getReports();
    for (PushedSegmentsReport pushedSegmentsReport : completedSubtaskReports.values()) {
        Map<String, TaskReport> taskReport = pushedSegmentsReport.getTaskReport();
        if (taskReport == null || taskReport.isEmpty()) {
            LOG.warn("Got an empty task report from subtask: " + pushedSegmentsReport.getTaskId());
            continue;
        }
        IngestionStatsAndErrorsTaskReport ingestionStatsAndErrorsReport = (IngestionStatsAndErrorsTaskReport) taskReport.get(IngestionStatsAndErrorsTaskReport.REPORT_KEY);
        IngestionStatsAndErrorsTaskReportData reportData = (IngestionStatsAndErrorsTaskReportData) ingestionStatsAndErrorsReport.getPayload();
        RowIngestionMetersTotals totals = getTotalsFromBuildSegmentsRowStats(reportData.getRowStats().get(RowIngestionMeters.BUILD_SEGMENTS));
        if (includeUnparseable) {
            List<ParseExceptionReport> taskUnparsebleEvents = (List<ParseExceptionReport>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
            unparseableEvents.addAll(taskUnparsebleEvents);
        }
        processed += totals.getProcessed();
        processedWithError += totals.getProcessedWithError();
        thrownAway += totals.getThrownAway();
        unparseable += totals.getUnparseable();
    }
    // Get stats from running tasks
    Set<String> runningTaskIds = parallelSinglePhaseRunner.getRunningTaskIds();
    for (String runningTaskId : runningTaskIds) {
        try {
            Map<String, Object> report = toolbox.getIndexingServiceClient().getTaskReport(runningTaskId);
            if (report == null || report.isEmpty()) {
                // task does not have a running report yet
                continue;
            }
            Map<String, Object> ingestionStatsAndErrors = (Map<String, Object>) report.get("ingestionStatsAndErrors");
            Map<String, Object> payload = (Map<String, Object>) ingestionStatsAndErrors.get("payload");
            Map<String, Object> rowStats = (Map<String, Object>) payload.get("rowStats");
            Map<String, Object> totals = (Map<String, Object>) rowStats.get("totals");
            Map<String, Object> buildSegments = (Map<String, Object>) totals.get(RowIngestionMeters.BUILD_SEGMENTS);
            if (includeUnparseable) {
                Map<String, Object> taskUnparseableEvents = (Map<String, Object>) payload.get("unparseableEvents");
                List<ParseExceptionReport> buildSegmentsUnparseableEvents = (List<ParseExceptionReport>) taskUnparseableEvents.get(RowIngestionMeters.BUILD_SEGMENTS);
                unparseableEvents.addAll(buildSegmentsUnparseableEvents);
            }
            processed += ((Number) buildSegments.get("processed")).longValue();
            processedWithError += ((Number) buildSegments.get("processedWithError")).longValue();
            thrownAway += ((Number) buildSegments.get("thrownAway")).longValue();
            unparseable += ((Number) buildSegments.get("unparseable")).longValue();
        } catch (Exception e) {
            LOG.warn(e, "Encountered exception when getting live subtask report for task: " + runningTaskId);
        }
    }
    Map<String, Object> rowStatsMap = new HashMap<>();
    Map<String, Object> totalsMap = new HashMap<>();
    totalsMap.put(RowIngestionMeters.BUILD_SEGMENTS, new RowIngestionMetersTotals(processed, processedWithError, thrownAway, unparseable));
    rowStatsMap.put("totals", totalsMap);
    return Pair.of(rowStatsMap, ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, unparseableEvents));
}
Also used : IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) TaskReport(org.apache.druid.indexing.common.TaskReport) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) HashMap(java.util.HashMap) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) ArrayList(java.util.ArrayList) IOException(java.io.IOException) MaxAllowedLocksExceededException(org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException) ArrayList(java.util.ArrayList) List(java.util.List) RowIngestionMetersTotals(org.apache.druid.segment.incremental.RowIngestionMetersTotals) Map(java.util.Map) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 2 with TaskReport

use of org.apache.druid.indexing.common.TaskReport in project druid by druid-io.

the class AbstractITBatchIndexTest method submitTaskAndWait.

protected void submitTaskAndWait(String taskSpec, String dataSourceName, boolean waitForNewVersion, boolean waitForSegmentsToLoad, Pair<Boolean, Boolean> segmentAvailabilityConfirmationPair) {
    final List<DataSegment> oldVersions = waitForNewVersion ? coordinator.getAvailableSegments(dataSourceName) : null;
    long startSubTaskCount = -1;
    final boolean assertRunsSubTasks = taskSpec.contains("index_parallel");
    if (assertRunsSubTasks) {
        startSubTaskCount = countCompleteSubTasks(dataSourceName, !taskSpec.contains("dynamic"));
    }
    final String taskID = indexer.submitTask(taskSpec);
    LOG.info("TaskID for loading index task %s", taskID);
    indexer.waitUntilTaskCompletes(taskID);
    if (assertRunsSubTasks) {
        final boolean perfectRollup = !taskSpec.contains("dynamic");
        final long newSubTasks = countCompleteSubTasks(dataSourceName, perfectRollup) - startSubTaskCount;
        Assert.assertTrue(newSubTasks > 0, StringUtils.format("The supervisor task[%s] didn't create any sub tasks. Was it executed in the parallel mode?", taskID));
    }
    if (segmentAvailabilityConfirmationPair.lhs != null && segmentAvailabilityConfirmationPair.lhs) {
        TaskReport reportRaw = indexer.getTaskReport(taskID).get("ingestionStatsAndErrors");
        IngestionStatsAndErrorsTaskReport report = (IngestionStatsAndErrorsTaskReport) reportRaw;
        IngestionStatsAndErrorsTaskReportData reportData = (IngestionStatsAndErrorsTaskReportData) report.getPayload();
        // Confirm that the task waited longer than 0ms for the task to complete.
        Assert.assertTrue(reportData.getSegmentAvailabilityWaitTimeMs() > 0);
        // Make sure that the result of waiting for segments to load matches the expected result
        if (segmentAvailabilityConfirmationPair.rhs != null) {
            Assert.assertEquals(Boolean.valueOf(reportData.isSegmentAvailabilityConfirmed()), segmentAvailabilityConfirmationPair.rhs);
        }
    }
    // original segments have loaded.
    if (waitForNewVersion) {
        ITRetryUtil.retryUntilTrue(() -> {
            final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(coordinator.getAvailableSegments(dataSourceName));
            final List<TimelineObjectHolder<String, DataSegment>> holders = timeline.lookup(Intervals.ETERNITY);
            return FluentIterable.from(holders).transformAndConcat(TimelineObjectHolder::getObject).anyMatch(chunk -> FluentIterable.from(oldVersions).anyMatch(oldSegment -> chunk.getObject().overshadows(oldSegment)));
        }, "See a new version");
    }
    if (waitForSegmentsToLoad) {
        ITRetryUtil.retryUntilTrue(() -> coordinator.areSegmentsLoaded(dataSourceName), "Segment Load");
    }
}
Also used : TaskReport(org.apache.druid.indexing.common.TaskReport) Logger(org.apache.druid.java.util.common.logger.Logger) Intervals(org.apache.druid.java.util.common.Intervals) ClientInfoResourceTestClient(org.apache.druid.testing.clients.ClientInfoResourceTestClient) Inject(com.google.inject.Inject) Function(java.util.function.Function) PartialDimensionDistributionTask(org.apache.druid.indexing.common.task.batch.parallel.PartialDimensionDistributionTask) PartialRangeSegmentGenerateTask(org.apache.druid.indexing.common.task.batch.parallel.PartialRangeSegmentGenerateTask) Pair(org.apache.druid.java.util.common.Pair) ArrayList(java.util.ArrayList) Assert(org.testng.Assert) FluentIterable(com.google.common.collect.FluentIterable) PartialDimensionCardinalityTask(org.apache.druid.indexing.common.task.batch.parallel.PartialDimensionCardinalityTask) SecondaryPartitionType(org.apache.druid.indexer.partitions.SecondaryPartitionType) IntegrationTestingConfig(org.apache.druid.testing.IntegrationTestingConfig) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) ITRetryUtil(org.apache.druid.testing.utils.ITRetryUtil) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) StringUtils(org.apache.druid.java.util.common.StringUtils) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) PartialHashSegmentGenerateTask(org.apache.druid.indexing.common.task.batch.parallel.PartialHashSegmentGenerateTask) ISE(org.apache.druid.java.util.common.ISE) IOException(java.io.IOException) StandardCharsets(java.nio.charset.StandardCharsets) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) SinglePhaseSubTask(org.apache.druid.indexing.common.task.batch.parallel.SinglePhaseSubTask) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) PartialGenericSegmentMergeTask(org.apache.druid.indexing.common.task.batch.parallel.PartialGenericSegmentMergeTask) DataSegment(org.apache.druid.timeline.DataSegment) SqlTestQueryHelper(org.apache.druid.testing.utils.SqlTestQueryHelper) InputStream(java.io.InputStream) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) TaskReport(org.apache.druid.indexing.common.TaskReport) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) DataSegment(org.apache.druid.timeline.DataSegment)

Example 3 with TaskReport

use of org.apache.druid.indexing.common.TaskReport in project druid by druid-io.

the class SinglePhaseSubTask method runTask.

@Override
public TaskStatus runTask(final TaskToolbox toolbox) {
    try {
        if (missingIntervalsInOverwriteMode) {
            LOG.warn("Intervals are missing in granularitySpec while this task is potentially overwriting existing segments. " + "Forced to use timeChunk lock.");
        }
        this.authorizerMapper = toolbox.getAuthorizerMapper();
        toolbox.getChatHandlerProvider().register(getId(), this, false);
        rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
        parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, ingestionSchema.getTuningConfig().isLogParseExceptions(), ingestionSchema.getTuningConfig().getMaxParseExceptions(), ingestionSchema.getTuningConfig().getMaxSavedParseExceptions());
        final InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource(ingestionSchema.getDataSchema().getParser());
        final ParallelIndexSupervisorTaskClient taskClient = toolbox.getSupervisorTaskClientFactory().build(new ClientBasedTaskInfoProvider(toolbox.getIndexingServiceClient()), getId(), // always use a single http thread
        1, ingestionSchema.getTuningConfig().getChatHandlerTimeout(), ingestionSchema.getTuningConfig().getChatHandlerNumRetries());
        ingestionState = IngestionState.BUILD_SEGMENTS;
        final Set<DataSegment> pushedSegments = generateAndPushSegments(toolbox, taskClient, inputSource, toolbox.getIndexingTmpDir());
        // Find inputSegments overshadowed by pushedSegments
        final Set<DataSegment> allSegments = new HashSet<>(getTaskLockHelper().getLockedExistingSegments());
        allSegments.addAll(pushedSegments);
        final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(allSegments);
        final Set<DataSegment> oldSegments = FluentIterable.from(timeline.findFullyOvershadowed()).transformAndConcat(TimelineObjectHolder::getObject).transform(PartitionChunk::getObject).toSet();
        Map<String, TaskReport> taskReport = getTaskCompletionReports();
        taskClient.report(supervisorTaskId, new PushedSegmentsReport(getId(), oldSegments, pushedSegments, taskReport));
        toolbox.getTaskReportFileWriter().write(getId(), taskReport);
        return TaskStatus.success(getId());
    } catch (Exception e) {
        LOG.error(e, "Encountered exception in parallel sub task.");
        errorMsg = Throwables.getStackTraceAsString(e);
        toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
        return TaskStatus.failure(getId(), errorMsg);
    } finally {
        toolbox.getChatHandlerProvider().unregister(getId());
    }
}
Also used : InputSource(org.apache.druid.data.input.InputSource) TaskReport(org.apache.druid.indexing.common.TaskReport) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) DataSegment(org.apache.druid.timeline.DataSegment) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) ClientBasedTaskInfoProvider(org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider) HashSet(java.util.HashSet)

Example 4 with TaskReport

use of org.apache.druid.indexing.common.TaskReport in project druid by druid-io.

the class TaskReportSerdeTest method testSerde.

@Test
public void testSerde() throws Exception {
    IngestionStatsAndErrorsTaskReport report1 = new IngestionStatsAndErrorsTaskReport("testID", new IngestionStatsAndErrorsTaskReportData(IngestionState.BUILD_SEGMENTS, ImmutableMap.of("hello", "world"), ImmutableMap.of("number", 1234), "an error message", true, 1000L));
    String report1serialized = jsonMapper.writeValueAsString(report1);
    IngestionStatsAndErrorsTaskReport report2 = jsonMapper.readValue(report1serialized, IngestionStatsAndErrorsTaskReport.class);
    Assert.assertEquals(report1, report2);
    Assert.assertEquals(report1.hashCode(), report2.hashCode());
    Map<String, TaskReport> reportMap1 = TaskReport.buildTaskReports(report1);
    String reportMapSerialized = jsonMapper.writeValueAsString(reportMap1);
    Map<String, TaskReport> reportMap2 = jsonMapper.readValue(reportMapSerialized, new TypeReference<Map<String, TaskReport>>() {
    });
    Assert.assertEquals(reportMap1, reportMap2);
}
Also used : IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) TaskReport(org.apache.druid.indexing.common.TaskReport) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) ImmutableMap(com.google.common.collect.ImmutableMap) Map(java.util.Map) Test(org.junit.Test)

Aggregations

IngestionStatsAndErrorsTaskReport (org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport)4 TaskReport (org.apache.druid.indexing.common.TaskReport)4 IOException (java.io.IOException)3 IngestionStatsAndErrorsTaskReportData (org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Map (java.util.Map)2 DataSegment (org.apache.druid.timeline.DataSegment)2 TimelineObjectHolder (org.apache.druid.timeline.TimelineObjectHolder)2 FluentIterable (com.google.common.collect.FluentIterable)1 Inject (com.google.inject.Inject)1 InputStream (java.io.InputStream)1 StandardCharsets (java.nio.charset.StandardCharsets)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 TreeMap (java.util.TreeMap)1 ExecutionException (java.util.concurrent.ExecutionException)1 TimeoutException (java.util.concurrent.TimeoutException)1 Function (java.util.function.Function)1