Search in sources :

Example 1 with RowIngestionMetersTotals

use of org.apache.druid.segment.incremental.RowIngestionMetersTotals in project druid by druid-io.

the class TaskRealtimeMetricsMonitor method doMonitor.

@Override
public boolean doMonitor(ServiceEmitter emitter) {
    FireDepartmentMetrics metrics = fireDepartment.getMetrics().snapshot();
    RowIngestionMetersTotals rowIngestionMetersTotals = rowIngestionMeters.getTotals();
    final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, fireDepartment.getDataSchema().getDataSource());
    MonitorUtils.addDimensionsToBuilder(builder, dimensions);
    final long thrownAway = rowIngestionMetersTotals.getThrownAway() - previousRowIngestionMetersTotals.getThrownAway();
    if (thrownAway > 0) {
        log.warn("[%,d] events thrown away. Possible causes: null events, events filtered out by transformSpec, or events outside earlyMessageRejectionPeriod / lateMessageRejectionPeriod.", thrownAway);
    }
    emitter.emit(builder.build("ingest/events/thrownAway", thrownAway));
    final long unparseable = rowIngestionMetersTotals.getUnparseable() - previousRowIngestionMetersTotals.getUnparseable();
    if (unparseable > 0) {
        log.error("[%,d] unparseable events discarded. Turn on debug logging to see exception stack trace.", unparseable);
    }
    emitter.emit(builder.build("ingest/events/unparseable", unparseable));
    final long processedWithError = rowIngestionMetersTotals.getProcessedWithError() - previousRowIngestionMetersTotals.getProcessedWithError();
    if (processedWithError > 0) {
        log.error("[%,d] events processed with errors! Set logParseExceptions to true in the ingestion spec to log these errors.", processedWithError);
    }
    emitter.emit(builder.build("ingest/events/processedWithError", processedWithError));
    emitter.emit(builder.build("ingest/events/processed", rowIngestionMetersTotals.getProcessed() - previousRowIngestionMetersTotals.getProcessed()));
    final long dedup = metrics.dedup() - previousFireDepartmentMetrics.dedup();
    if (dedup > 0) {
        log.warn("[%,d] duplicate events!", dedup);
    }
    emitter.emit(builder.build("ingest/events/duplicate", dedup));
    emitter.emit(builder.build("ingest/rows/output", metrics.rowOutput() - previousFireDepartmentMetrics.rowOutput()));
    emitter.emit(builder.build("ingest/persists/count", metrics.numPersists() - previousFireDepartmentMetrics.numPersists()));
    emitter.emit(builder.build("ingest/persists/time", metrics.persistTimeMillis() - previousFireDepartmentMetrics.persistTimeMillis()));
    emitter.emit(builder.build("ingest/persists/cpu", metrics.persistCpuTime() - previousFireDepartmentMetrics.persistCpuTime()));
    emitter.emit(builder.build("ingest/persists/backPressure", metrics.persistBackPressureMillis() - previousFireDepartmentMetrics.persistBackPressureMillis()));
    emitter.emit(builder.build("ingest/persists/failed", metrics.failedPersists() - previousFireDepartmentMetrics.failedPersists()));
    emitter.emit(builder.build("ingest/handoff/failed", metrics.failedHandoffs() - previousFireDepartmentMetrics.failedHandoffs()));
    emitter.emit(builder.build("ingest/merge/time", metrics.mergeTimeMillis() - previousFireDepartmentMetrics.mergeTimeMillis()));
    emitter.emit(builder.build("ingest/merge/cpu", metrics.mergeCpuTime() - previousFireDepartmentMetrics.mergeCpuTime()));
    emitter.emit(builder.build("ingest/handoff/count", metrics.handOffCount() - previousFireDepartmentMetrics.handOffCount()));
    emitter.emit(builder.build("ingest/sink/count", metrics.sinkCount()));
    emitter.emit(builder.build("ingest/events/messageGap", metrics.messageGap()));
    previousRowIngestionMetersTotals = rowIngestionMetersTotals;
    previousFireDepartmentMetrics = metrics;
    return true;
}
Also used : FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) ServiceMetricEvent(org.apache.druid.java.util.emitter.service.ServiceMetricEvent) RowIngestionMetersTotals(org.apache.druid.segment.incremental.RowIngestionMetersTotals)

Example 2 with RowIngestionMetersTotals

use of org.apache.druid.segment.incremental.RowIngestionMetersTotals in project druid by druid-io.

the class SinglePhaseParallelIndexingTest method testRunInSequential.

@Test
public void testRunInSequential() {
    final Interval interval = Intervals.of("2017-12/P1M");
    final boolean appendToExisting = false;
    final ParallelIndexSupervisorTask task = newTask(interval, appendToExisting, false);
    task.addToContext(Tasks.FORCE_TIME_CHUNK_LOCK_KEY, lockGranularity == LockGranularity.TIME_CHUNK);
    Assert.assertEquals(TaskState.SUCCESS, getIndexingServiceClient().runAndWait(task).getStatusCode());
    assertShardSpec(task, lockGranularity, appendToExisting, Collections.emptyList());
    TaskContainer taskContainer = getIndexingServiceClient().getTaskContainer(task.getId());
    final ParallelIndexSupervisorTask executedTask = (ParallelIndexSupervisorTask) taskContainer.getTask();
    Map<String, Object> actualReports = executedTask.doGetLiveReports("full");
    RowIngestionMetersTotals expectedTotals = new RowIngestionMetersTotals(10, 1, 1, 1);
    List<ParseExceptionReport> expectedUnparseableEvents = ImmutableList.of(new ParseExceptionReport("{ts=2017unparseable}", "unparseable", ImmutableList.of(getErrorMessageForUnparseableTimestamp()), 1L), new ParseExceptionReport("{ts=2017-12-25, dim=0 th test file, val=badval}", "processedWithError", ImmutableList.of("Unable to parse value[badval] for field[val]"), 1L));
    Map<String, Object> expectedReports;
    if (useInputFormatApi) {
        expectedReports = getExpectedTaskReportSequential(task.getId(), expectedUnparseableEvents, expectedTotals);
    } else {
        // when useInputFormatApi is false, maxConcurrentSubTasks=2 and it uses the single phase runner
        // instead of sequential runner
        expectedReports = getExpectedTaskReportParallel(task.getId(), expectedUnparseableEvents, expectedTotals);
    }
    compareTaskReports(expectedReports, actualReports);
    System.out.println(actualReports);
}
Also used : ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) RowIngestionMetersTotals(org.apache.druid.segment.incremental.RowIngestionMetersTotals) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 3 with RowIngestionMetersTotals

use of org.apache.druid.segment.incremental.RowIngestionMetersTotals in project druid by druid-io.

the class SinglePhaseParallelIndexingTest method testRunInParallelTaskReports.

@Test()
public void testRunInParallelTaskReports() {
    ParallelIndexSupervisorTask task = runTestTask(Intervals.of("2017-12/P1M"), Granularities.DAY, false, Collections.emptyList());
    Map<String, Object> actualReports = task.doGetLiveReports("full");
    Map<String, Object> expectedReports = getExpectedTaskReportParallel(task.getId(), ImmutableList.of(new ParseExceptionReport("{ts=2017unparseable}", "unparseable", ImmutableList.of(getErrorMessageForUnparseableTimestamp()), 1L), new ParseExceptionReport("{ts=2017-12-25, dim=0 th test file, val=badval}", "processedWithError", ImmutableList.of("Unable to parse value[badval] for field[val]"), 1L)), new RowIngestionMetersTotals(10, 1, 1, 1));
    compareTaskReports(expectedReports, actualReports);
}
Also used : ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) RowIngestionMetersTotals(org.apache.druid.segment.incremental.RowIngestionMetersTotals) Test(org.junit.Test)

Example 4 with RowIngestionMetersTotals

use of org.apache.druid.segment.incremental.RowIngestionMetersTotals in project druid by druid-io.

the class AbstractStreamIndexingTest method testIndexWithStreamReshardHelper.

private void testIndexWithStreamReshardHelper(@Nullable Boolean transactionEnabled, int newShardCount) throws Exception {
    final GeneratedTestConfig generatedTestConfig = new GeneratedTestConfig(INPUT_FORMAT, getResourceAsString(JSON_INPUT_FORMAT_PATH));
    try (final Closeable closer = createResourceCloser(generatedTestConfig);
        final StreamEventWriter streamEventWriter = createStreamEventWriter(config, transactionEnabled)) {
        final String taskSpec = generatedTestConfig.getStreamIngestionPropsTransform().apply(getResourceAsString(SUPERVISOR_SPEC_TEMPLATE_PATH));
        LOG.info("supervisorSpec: [%s]\n", taskSpec);
        // Start supervisor
        generatedTestConfig.setSupervisorId(indexer.submitSupervisor(taskSpec));
        LOG.info("Submitted supervisor");
        // Start generating one third of the data (before resharding)
        int secondsToGenerateRemaining = TOTAL_NUMBER_OF_SECOND;
        int secondsToGenerateFirstRound = TOTAL_NUMBER_OF_SECOND / 3;
        secondsToGenerateRemaining = secondsToGenerateRemaining - secondsToGenerateFirstRound;
        final StreamGenerator streamGenerator = new WikipediaStreamEventStreamGenerator(new JsonEventSerializer(jsonMapper), EVENTS_PER_SECOND, CYCLE_PADDING_MS);
        long numWritten = streamGenerator.run(generatedTestConfig.getStreamName(), streamEventWriter, secondsToGenerateFirstRound, FIRST_EVENT_TIME);
        // Verify supervisor is healthy before resahrding
        ITRetryUtil.retryUntil(() -> SupervisorStateManager.BasicState.RUNNING.equals(indexer.getSupervisorStatus(generatedTestConfig.getSupervisorId())), true, 10000, 30, "Waiting for supervisor to be healthy");
        // Reshard the supervisor by split from STREAM_SHARD_COUNT to newShardCount and waits until the resharding starts
        streamAdminClient.updatePartitionCount(generatedTestConfig.getStreamName(), newShardCount, true);
        // Start generating one third of the data (while resharding)
        int secondsToGenerateSecondRound = TOTAL_NUMBER_OF_SECOND / 3;
        secondsToGenerateRemaining = secondsToGenerateRemaining - secondsToGenerateSecondRound;
        numWritten += streamGenerator.run(generatedTestConfig.getStreamName(), streamEventWriter, secondsToGenerateSecondRound, FIRST_EVENT_TIME.plusSeconds(secondsToGenerateFirstRound));
        // Wait for stream to finish resharding
        ITRetryUtil.retryUntil(() -> streamAdminClient.isStreamActive(generatedTestConfig.getStreamName()), true, 10000, 30, "Waiting for stream to finish resharding");
        ITRetryUtil.retryUntil(() -> streamAdminClient.verfiyPartitionCountUpdated(generatedTestConfig.getStreamName(), STREAM_SHARD_COUNT, newShardCount), true, 10000, 30, "Waiting for stream to finish resharding");
        // Start generating remaining data (after resharding)
        numWritten += streamGenerator.run(generatedTestConfig.getStreamName(), streamEventWriter, secondsToGenerateRemaining, FIRST_EVENT_TIME.plusSeconds(secondsToGenerateFirstRound + secondsToGenerateSecondRound));
        // Verify supervisor is healthy after resahrding
        ITRetryUtil.retryUntil(() -> SupervisorStateManager.BasicState.RUNNING.equals(indexer.getSupervisorStatus(generatedTestConfig.getSupervisorId())), true, 10000, 30, "Waiting for supervisor to be healthy");
        // Verify that supervisor can catch up with the stream
        verifyIngestedData(generatedTestConfig, numWritten);
    }
    // Verify that event thrown away count was not incremented by the reshard
    List<TaskResponseObject> completedTasks = indexer.getCompleteTasksForDataSource(generatedTestConfig.getFullDatasourceName());
    for (TaskResponseObject task : completedTasks) {
        try {
            RowIngestionMetersTotals stats = indexer.getTaskStats(task.getId());
            Assert.assertEquals(0L, stats.getThrownAway());
        } catch (Exception e) {
            // Failed task may not have a task stats report. We can ignore it as the task did not consume any data
            if (!task.getStatus().isFailure()) {
                throw e;
            }
        }
    }
}
Also used : Closeable(java.io.Closeable) StreamEventWriter(org.apache.druid.testing.utils.StreamEventWriter) IOException(java.io.IOException) JsonEventSerializer(org.apache.druid.testing.utils.JsonEventSerializer) TaskResponseObject(org.apache.druid.testing.clients.TaskResponseObject) WikipediaStreamEventStreamGenerator(org.apache.druid.testing.utils.WikipediaStreamEventStreamGenerator) StreamGenerator(org.apache.druid.testing.utils.StreamGenerator) WikipediaStreamEventStreamGenerator(org.apache.druid.testing.utils.WikipediaStreamEventStreamGenerator) RowIngestionMetersTotals(org.apache.druid.segment.incremental.RowIngestionMetersTotals)

Example 5 with RowIngestionMetersTotals

use of org.apache.druid.segment.incremental.RowIngestionMetersTotals in project druid by druid-io.

the class ParallelIndexSupervisorTask method doGetRowStatsAndUnparseableEventsParallelSinglePhase.

private Pair<Map<String, Object>, Map<String, Object>> doGetRowStatsAndUnparseableEventsParallelSinglePhase(SinglePhaseParallelIndexTaskRunner parallelSinglePhaseRunner, boolean includeUnparseable) {
    long processed = 0L;
    long processedWithError = 0L;
    long thrownAway = 0L;
    long unparseable = 0L;
    List<ParseExceptionReport> unparseableEvents = new ArrayList<>();
    // Get stats from completed tasks
    Map<String, PushedSegmentsReport> completedSubtaskReports = parallelSinglePhaseRunner.getReports();
    for (PushedSegmentsReport pushedSegmentsReport : completedSubtaskReports.values()) {
        Map<String, TaskReport> taskReport = pushedSegmentsReport.getTaskReport();
        if (taskReport == null || taskReport.isEmpty()) {
            LOG.warn("Got an empty task report from subtask: " + pushedSegmentsReport.getTaskId());
            continue;
        }
        IngestionStatsAndErrorsTaskReport ingestionStatsAndErrorsReport = (IngestionStatsAndErrorsTaskReport) taskReport.get(IngestionStatsAndErrorsTaskReport.REPORT_KEY);
        IngestionStatsAndErrorsTaskReportData reportData = (IngestionStatsAndErrorsTaskReportData) ingestionStatsAndErrorsReport.getPayload();
        RowIngestionMetersTotals totals = getTotalsFromBuildSegmentsRowStats(reportData.getRowStats().get(RowIngestionMeters.BUILD_SEGMENTS));
        if (includeUnparseable) {
            List<ParseExceptionReport> taskUnparsebleEvents = (List<ParseExceptionReport>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
            unparseableEvents.addAll(taskUnparsebleEvents);
        }
        processed += totals.getProcessed();
        processedWithError += totals.getProcessedWithError();
        thrownAway += totals.getThrownAway();
        unparseable += totals.getUnparseable();
    }
    // Get stats from running tasks
    Set<String> runningTaskIds = parallelSinglePhaseRunner.getRunningTaskIds();
    for (String runningTaskId : runningTaskIds) {
        try {
            Map<String, Object> report = toolbox.getIndexingServiceClient().getTaskReport(runningTaskId);
            if (report == null || report.isEmpty()) {
                // task does not have a running report yet
                continue;
            }
            Map<String, Object> ingestionStatsAndErrors = (Map<String, Object>) report.get("ingestionStatsAndErrors");
            Map<String, Object> payload = (Map<String, Object>) ingestionStatsAndErrors.get("payload");
            Map<String, Object> rowStats = (Map<String, Object>) payload.get("rowStats");
            Map<String, Object> totals = (Map<String, Object>) rowStats.get("totals");
            Map<String, Object> buildSegments = (Map<String, Object>) totals.get(RowIngestionMeters.BUILD_SEGMENTS);
            if (includeUnparseable) {
                Map<String, Object> taskUnparseableEvents = (Map<String, Object>) payload.get("unparseableEvents");
                List<ParseExceptionReport> buildSegmentsUnparseableEvents = (List<ParseExceptionReport>) taskUnparseableEvents.get(RowIngestionMeters.BUILD_SEGMENTS);
                unparseableEvents.addAll(buildSegmentsUnparseableEvents);
            }
            processed += ((Number) buildSegments.get("processed")).longValue();
            processedWithError += ((Number) buildSegments.get("processedWithError")).longValue();
            thrownAway += ((Number) buildSegments.get("thrownAway")).longValue();
            unparseable += ((Number) buildSegments.get("unparseable")).longValue();
        } catch (Exception e) {
            LOG.warn(e, "Encountered exception when getting live subtask report for task: " + runningTaskId);
        }
    }
    Map<String, Object> rowStatsMap = new HashMap<>();
    Map<String, Object> totalsMap = new HashMap<>();
    totalsMap.put(RowIngestionMeters.BUILD_SEGMENTS, new RowIngestionMetersTotals(processed, processedWithError, thrownAway, unparseable));
    rowStatsMap.put("totals", totalsMap);
    return Pair.of(rowStatsMap, ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, unparseableEvents));
}
Also used : IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) TaskReport(org.apache.druid.indexing.common.TaskReport) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) HashMap(java.util.HashMap) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) ArrayList(java.util.ArrayList) IOException(java.io.IOException) MaxAllowedLocksExceededException(org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException) ArrayList(java.util.ArrayList) List(java.util.List) RowIngestionMetersTotals(org.apache.druid.segment.incremental.RowIngestionMetersTotals) Map(java.util.Map) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Aggregations

RowIngestionMetersTotals (org.apache.druid.segment.incremental.RowIngestionMetersTotals)7 HashMap (java.util.HashMap)3 ParseExceptionReport (org.apache.druid.segment.incremental.ParseExceptionReport)3 Test (org.junit.Test)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 IOException (java.io.IOException)2 Map (java.util.Map)2 TreeMap (java.util.TreeMap)2 Closeable (java.io.Closeable)1 ArrayList (java.util.ArrayList)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1 TaskStatus (org.apache.druid.indexer.TaskStatus)1 IngestionStatsAndErrorsTaskReport (org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport)1 IngestionStatsAndErrorsTaskReportData (org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData)1 TaskReport (org.apache.druid.indexing.common.TaskReport)1 IndexTaskTest (org.apache.druid.indexing.common.task.IndexTaskTest)1 MaxAllowedLocksExceededException (org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException)1 SeekableStreamEndSequenceNumbers (org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers)1 SeekableStreamIndexTaskRunner (org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskRunner)1