Search in sources :

Example 1 with IngestionStatsAndErrorsTaskReport

use of org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport in project druid by druid-io.

the class ParallelIndexSupervisorTask method doGetRowStatsAndUnparseableEventsParallelSinglePhase.

private Pair<Map<String, Object>, Map<String, Object>> doGetRowStatsAndUnparseableEventsParallelSinglePhase(SinglePhaseParallelIndexTaskRunner parallelSinglePhaseRunner, boolean includeUnparseable) {
    long processed = 0L;
    long processedWithError = 0L;
    long thrownAway = 0L;
    long unparseable = 0L;
    List<ParseExceptionReport> unparseableEvents = new ArrayList<>();
    // Get stats from completed tasks
    Map<String, PushedSegmentsReport> completedSubtaskReports = parallelSinglePhaseRunner.getReports();
    for (PushedSegmentsReport pushedSegmentsReport : completedSubtaskReports.values()) {
        Map<String, TaskReport> taskReport = pushedSegmentsReport.getTaskReport();
        if (taskReport == null || taskReport.isEmpty()) {
            LOG.warn("Got an empty task report from subtask: " + pushedSegmentsReport.getTaskId());
            continue;
        }
        IngestionStatsAndErrorsTaskReport ingestionStatsAndErrorsReport = (IngestionStatsAndErrorsTaskReport) taskReport.get(IngestionStatsAndErrorsTaskReport.REPORT_KEY);
        IngestionStatsAndErrorsTaskReportData reportData = (IngestionStatsAndErrorsTaskReportData) ingestionStatsAndErrorsReport.getPayload();
        RowIngestionMetersTotals totals = getTotalsFromBuildSegmentsRowStats(reportData.getRowStats().get(RowIngestionMeters.BUILD_SEGMENTS));
        if (includeUnparseable) {
            List<ParseExceptionReport> taskUnparsebleEvents = (List<ParseExceptionReport>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
            unparseableEvents.addAll(taskUnparsebleEvents);
        }
        processed += totals.getProcessed();
        processedWithError += totals.getProcessedWithError();
        thrownAway += totals.getThrownAway();
        unparseable += totals.getUnparseable();
    }
    // Get stats from running tasks
    Set<String> runningTaskIds = parallelSinglePhaseRunner.getRunningTaskIds();
    for (String runningTaskId : runningTaskIds) {
        try {
            Map<String, Object> report = toolbox.getIndexingServiceClient().getTaskReport(runningTaskId);
            if (report == null || report.isEmpty()) {
                // task does not have a running report yet
                continue;
            }
            Map<String, Object> ingestionStatsAndErrors = (Map<String, Object>) report.get("ingestionStatsAndErrors");
            Map<String, Object> payload = (Map<String, Object>) ingestionStatsAndErrors.get("payload");
            Map<String, Object> rowStats = (Map<String, Object>) payload.get("rowStats");
            Map<String, Object> totals = (Map<String, Object>) rowStats.get("totals");
            Map<String, Object> buildSegments = (Map<String, Object>) totals.get(RowIngestionMeters.BUILD_SEGMENTS);
            if (includeUnparseable) {
                Map<String, Object> taskUnparseableEvents = (Map<String, Object>) payload.get("unparseableEvents");
                List<ParseExceptionReport> buildSegmentsUnparseableEvents = (List<ParseExceptionReport>) taskUnparseableEvents.get(RowIngestionMeters.BUILD_SEGMENTS);
                unparseableEvents.addAll(buildSegmentsUnparseableEvents);
            }
            processed += ((Number) buildSegments.get("processed")).longValue();
            processedWithError += ((Number) buildSegments.get("processedWithError")).longValue();
            thrownAway += ((Number) buildSegments.get("thrownAway")).longValue();
            unparseable += ((Number) buildSegments.get("unparseable")).longValue();
        } catch (Exception e) {
            LOG.warn(e, "Encountered exception when getting live subtask report for task: " + runningTaskId);
        }
    }
    Map<String, Object> rowStatsMap = new HashMap<>();
    Map<String, Object> totalsMap = new HashMap<>();
    totalsMap.put(RowIngestionMeters.BUILD_SEGMENTS, new RowIngestionMetersTotals(processed, processedWithError, thrownAway, unparseable));
    rowStatsMap.put("totals", totalsMap);
    return Pair.of(rowStatsMap, ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, unparseableEvents));
}
Also used : IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) TaskReport(org.apache.druid.indexing.common.TaskReport) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) HashMap(java.util.HashMap) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) ArrayList(java.util.ArrayList) IOException(java.io.IOException) MaxAllowedLocksExceededException(org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException) ArrayList(java.util.ArrayList) List(java.util.List) RowIngestionMetersTotals(org.apache.druid.segment.incremental.RowIngestionMetersTotals) Map(java.util.Map) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 2 with IngestionStatsAndErrorsTaskReport

use of org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport in project druid by druid-io.

the class AbstractITBatchIndexTest method submitTaskAndWait.

protected void submitTaskAndWait(String taskSpec, String dataSourceName, boolean waitForNewVersion, boolean waitForSegmentsToLoad, Pair<Boolean, Boolean> segmentAvailabilityConfirmationPair) {
    final List<DataSegment> oldVersions = waitForNewVersion ? coordinator.getAvailableSegments(dataSourceName) : null;
    long startSubTaskCount = -1;
    final boolean assertRunsSubTasks = taskSpec.contains("index_parallel");
    if (assertRunsSubTasks) {
        startSubTaskCount = countCompleteSubTasks(dataSourceName, !taskSpec.contains("dynamic"));
    }
    final String taskID = indexer.submitTask(taskSpec);
    LOG.info("TaskID for loading index task %s", taskID);
    indexer.waitUntilTaskCompletes(taskID);
    if (assertRunsSubTasks) {
        final boolean perfectRollup = !taskSpec.contains("dynamic");
        final long newSubTasks = countCompleteSubTasks(dataSourceName, perfectRollup) - startSubTaskCount;
        Assert.assertTrue(newSubTasks > 0, StringUtils.format("The supervisor task[%s] didn't create any sub tasks. Was it executed in the parallel mode?", taskID));
    }
    if (segmentAvailabilityConfirmationPair.lhs != null && segmentAvailabilityConfirmationPair.lhs) {
        TaskReport reportRaw = indexer.getTaskReport(taskID).get("ingestionStatsAndErrors");
        IngestionStatsAndErrorsTaskReport report = (IngestionStatsAndErrorsTaskReport) reportRaw;
        IngestionStatsAndErrorsTaskReportData reportData = (IngestionStatsAndErrorsTaskReportData) report.getPayload();
        // Confirm that the task waited longer than 0ms for the task to complete.
        Assert.assertTrue(reportData.getSegmentAvailabilityWaitTimeMs() > 0);
        // Make sure that the result of waiting for segments to load matches the expected result
        if (segmentAvailabilityConfirmationPair.rhs != null) {
            Assert.assertEquals(Boolean.valueOf(reportData.isSegmentAvailabilityConfirmed()), segmentAvailabilityConfirmationPair.rhs);
        }
    }
    // original segments have loaded.
    if (waitForNewVersion) {
        ITRetryUtil.retryUntilTrue(() -> {
            final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(coordinator.getAvailableSegments(dataSourceName));
            final List<TimelineObjectHolder<String, DataSegment>> holders = timeline.lookup(Intervals.ETERNITY);
            return FluentIterable.from(holders).transformAndConcat(TimelineObjectHolder::getObject).anyMatch(chunk -> FluentIterable.from(oldVersions).anyMatch(oldSegment -> chunk.getObject().overshadows(oldSegment)));
        }, "See a new version");
    }
    if (waitForSegmentsToLoad) {
        ITRetryUtil.retryUntilTrue(() -> coordinator.areSegmentsLoaded(dataSourceName), "Segment Load");
    }
}
Also used : TaskReport(org.apache.druid.indexing.common.TaskReport) Logger(org.apache.druid.java.util.common.logger.Logger) Intervals(org.apache.druid.java.util.common.Intervals) ClientInfoResourceTestClient(org.apache.druid.testing.clients.ClientInfoResourceTestClient) Inject(com.google.inject.Inject) Function(java.util.function.Function) PartialDimensionDistributionTask(org.apache.druid.indexing.common.task.batch.parallel.PartialDimensionDistributionTask) PartialRangeSegmentGenerateTask(org.apache.druid.indexing.common.task.batch.parallel.PartialRangeSegmentGenerateTask) Pair(org.apache.druid.java.util.common.Pair) ArrayList(java.util.ArrayList) Assert(org.testng.Assert) FluentIterable(com.google.common.collect.FluentIterable) PartialDimensionCardinalityTask(org.apache.druid.indexing.common.task.batch.parallel.PartialDimensionCardinalityTask) SecondaryPartitionType(org.apache.druid.indexer.partitions.SecondaryPartitionType) IntegrationTestingConfig(org.apache.druid.testing.IntegrationTestingConfig) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) ITRetryUtil(org.apache.druid.testing.utils.ITRetryUtil) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) StringUtils(org.apache.druid.java.util.common.StringUtils) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) PartialHashSegmentGenerateTask(org.apache.druid.indexing.common.task.batch.parallel.PartialHashSegmentGenerateTask) ISE(org.apache.druid.java.util.common.ISE) IOException(java.io.IOException) StandardCharsets(java.nio.charset.StandardCharsets) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) SinglePhaseSubTask(org.apache.druid.indexing.common.task.batch.parallel.SinglePhaseSubTask) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) PartialGenericSegmentMergeTask(org.apache.druid.indexing.common.task.batch.parallel.PartialGenericSegmentMergeTask) DataSegment(org.apache.druid.timeline.DataSegment) SqlTestQueryHelper(org.apache.druid.testing.utils.SqlTestQueryHelper) InputStream(java.io.InputStream) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) TaskReport(org.apache.druid.indexing.common.TaskReport) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) DataSegment(org.apache.druid.timeline.DataSegment)

Example 3 with IngestionStatsAndErrorsTaskReport

use of org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport in project druid by druid-io.

the class TaskReportSerdeTest method testSerde.

@Test
public void testSerde() throws Exception {
    IngestionStatsAndErrorsTaskReport report1 = new IngestionStatsAndErrorsTaskReport("testID", new IngestionStatsAndErrorsTaskReportData(IngestionState.BUILD_SEGMENTS, ImmutableMap.of("hello", "world"), ImmutableMap.of("number", 1234), "an error message", true, 1000L));
    String report1serialized = jsonMapper.writeValueAsString(report1);
    IngestionStatsAndErrorsTaskReport report2 = jsonMapper.readValue(report1serialized, IngestionStatsAndErrorsTaskReport.class);
    Assert.assertEquals(report1, report2);
    Assert.assertEquals(report1.hashCode(), report2.hashCode());
    Map<String, TaskReport> reportMap1 = TaskReport.buildTaskReports(report1);
    String reportMapSerialized = jsonMapper.writeValueAsString(reportMap1);
    Map<String, TaskReport> reportMap2 = jsonMapper.readValue(reportMapSerialized, new TypeReference<Map<String, TaskReport>>() {
    });
    Assert.assertEquals(reportMap1, reportMap2);
}
Also used : IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) TaskReport(org.apache.druid.indexing.common.TaskReport) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) ImmutableMap(com.google.common.collect.ImmutableMap) Map(java.util.Map) Test(org.junit.Test)

Aggregations

IngestionStatsAndErrorsTaskReport (org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport)3 IngestionStatsAndErrorsTaskReportData (org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData)3 TaskReport (org.apache.druid.indexing.common.TaskReport)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Map (java.util.Map)2 FluentIterable (com.google.common.collect.FluentIterable)1 Inject (com.google.inject.Inject)1 InputStream (java.io.InputStream)1 StandardCharsets (java.nio.charset.StandardCharsets)1 HashMap (java.util.HashMap)1 TreeMap (java.util.TreeMap)1 Function (java.util.function.Function)1 IOUtils (org.apache.commons.io.IOUtils)1 SecondaryPartitionType (org.apache.druid.indexer.partitions.SecondaryPartitionType)1 MaxAllowedLocksExceededException (org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException)1 PartialDimensionCardinalityTask (org.apache.druid.indexing.common.task.batch.parallel.PartialDimensionCardinalityTask)1 PartialDimensionDistributionTask (org.apache.druid.indexing.common.task.batch.parallel.PartialDimensionDistributionTask)1