use of org.apache.druid.segment.incremental.RowIngestionMetersTotals in project druid by druid-io.
the class TaskRealtimeMetricsMonitor method doMonitor.
@Override
public boolean doMonitor(ServiceEmitter emitter) {
FireDepartmentMetrics metrics = fireDepartment.getMetrics().snapshot();
RowIngestionMetersTotals rowIngestionMetersTotals = rowIngestionMeters.getTotals();
final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, fireDepartment.getDataSchema().getDataSource());
MonitorUtils.addDimensionsToBuilder(builder, dimensions);
final long thrownAway = rowIngestionMetersTotals.getThrownAway() - previousRowIngestionMetersTotals.getThrownAway();
if (thrownAway > 0) {
log.warn("[%,d] events thrown away. Possible causes: null events, events filtered out by transformSpec, or events outside earlyMessageRejectionPeriod / lateMessageRejectionPeriod.", thrownAway);
}
emitter.emit(builder.build("ingest/events/thrownAway", thrownAway));
final long unparseable = rowIngestionMetersTotals.getUnparseable() - previousRowIngestionMetersTotals.getUnparseable();
if (unparseable > 0) {
log.error("[%,d] unparseable events discarded. Turn on debug logging to see exception stack trace.", unparseable);
}
emitter.emit(builder.build("ingest/events/unparseable", unparseable));
final long processedWithError = rowIngestionMetersTotals.getProcessedWithError() - previousRowIngestionMetersTotals.getProcessedWithError();
if (processedWithError > 0) {
log.error("[%,d] events processed with errors! Set logParseExceptions to true in the ingestion spec to log these errors.", processedWithError);
}
emitter.emit(builder.build("ingest/events/processedWithError", processedWithError));
emitter.emit(builder.build("ingest/events/processed", rowIngestionMetersTotals.getProcessed() - previousRowIngestionMetersTotals.getProcessed()));
final long dedup = metrics.dedup() - previousFireDepartmentMetrics.dedup();
if (dedup > 0) {
log.warn("[%,d] duplicate events!", dedup);
}
emitter.emit(builder.build("ingest/events/duplicate", dedup));
emitter.emit(builder.build("ingest/rows/output", metrics.rowOutput() - previousFireDepartmentMetrics.rowOutput()));
emitter.emit(builder.build("ingest/persists/count", metrics.numPersists() - previousFireDepartmentMetrics.numPersists()));
emitter.emit(builder.build("ingest/persists/time", metrics.persistTimeMillis() - previousFireDepartmentMetrics.persistTimeMillis()));
emitter.emit(builder.build("ingest/persists/cpu", metrics.persistCpuTime() - previousFireDepartmentMetrics.persistCpuTime()));
emitter.emit(builder.build("ingest/persists/backPressure", metrics.persistBackPressureMillis() - previousFireDepartmentMetrics.persistBackPressureMillis()));
emitter.emit(builder.build("ingest/persists/failed", metrics.failedPersists() - previousFireDepartmentMetrics.failedPersists()));
emitter.emit(builder.build("ingest/handoff/failed", metrics.failedHandoffs() - previousFireDepartmentMetrics.failedHandoffs()));
emitter.emit(builder.build("ingest/merge/time", metrics.mergeTimeMillis() - previousFireDepartmentMetrics.mergeTimeMillis()));
emitter.emit(builder.build("ingest/merge/cpu", metrics.mergeCpuTime() - previousFireDepartmentMetrics.mergeCpuTime()));
emitter.emit(builder.build("ingest/handoff/count", metrics.handOffCount() - previousFireDepartmentMetrics.handOffCount()));
emitter.emit(builder.build("ingest/sink/count", metrics.sinkCount()));
emitter.emit(builder.build("ingest/events/messageGap", metrics.messageGap()));
previousRowIngestionMetersTotals = rowIngestionMetersTotals;
previousFireDepartmentMetrics = metrics;
return true;
}
use of org.apache.druid.segment.incremental.RowIngestionMetersTotals in project druid by druid-io.
the class SinglePhaseParallelIndexingTest method testRunInSequential.
@Test
public void testRunInSequential() {
final Interval interval = Intervals.of("2017-12/P1M");
final boolean appendToExisting = false;
final ParallelIndexSupervisorTask task = newTask(interval, appendToExisting, false);
task.addToContext(Tasks.FORCE_TIME_CHUNK_LOCK_KEY, lockGranularity == LockGranularity.TIME_CHUNK);
Assert.assertEquals(TaskState.SUCCESS, getIndexingServiceClient().runAndWait(task).getStatusCode());
assertShardSpec(task, lockGranularity, appendToExisting, Collections.emptyList());
TaskContainer taskContainer = getIndexingServiceClient().getTaskContainer(task.getId());
final ParallelIndexSupervisorTask executedTask = (ParallelIndexSupervisorTask) taskContainer.getTask();
Map<String, Object> actualReports = executedTask.doGetLiveReports("full");
RowIngestionMetersTotals expectedTotals = new RowIngestionMetersTotals(10, 1, 1, 1);
List<ParseExceptionReport> expectedUnparseableEvents = ImmutableList.of(new ParseExceptionReport("{ts=2017unparseable}", "unparseable", ImmutableList.of(getErrorMessageForUnparseableTimestamp()), 1L), new ParseExceptionReport("{ts=2017-12-25, dim=0 th test file, val=badval}", "processedWithError", ImmutableList.of("Unable to parse value[badval] for field[val]"), 1L));
Map<String, Object> expectedReports;
if (useInputFormatApi) {
expectedReports = getExpectedTaskReportSequential(task.getId(), expectedUnparseableEvents, expectedTotals);
} else {
// when useInputFormatApi is false, maxConcurrentSubTasks=2 and it uses the single phase runner
// instead of sequential runner
expectedReports = getExpectedTaskReportParallel(task.getId(), expectedUnparseableEvents, expectedTotals);
}
compareTaskReports(expectedReports, actualReports);
System.out.println(actualReports);
}
use of org.apache.druid.segment.incremental.RowIngestionMetersTotals in project druid by druid-io.
the class SinglePhaseParallelIndexingTest method testRunInParallelTaskReports.
@Test()
public void testRunInParallelTaskReports() {
ParallelIndexSupervisorTask task = runTestTask(Intervals.of("2017-12/P1M"), Granularities.DAY, false, Collections.emptyList());
Map<String, Object> actualReports = task.doGetLiveReports("full");
Map<String, Object> expectedReports = getExpectedTaskReportParallel(task.getId(), ImmutableList.of(new ParseExceptionReport("{ts=2017unparseable}", "unparseable", ImmutableList.of(getErrorMessageForUnparseableTimestamp()), 1L), new ParseExceptionReport("{ts=2017-12-25, dim=0 th test file, val=badval}", "processedWithError", ImmutableList.of("Unable to parse value[badval] for field[val]"), 1L)), new RowIngestionMetersTotals(10, 1, 1, 1));
compareTaskReports(expectedReports, actualReports);
}
use of org.apache.druid.segment.incremental.RowIngestionMetersTotals in project druid by druid-io.
the class AbstractStreamIndexingTest method testIndexWithStreamReshardHelper.
private void testIndexWithStreamReshardHelper(@Nullable Boolean transactionEnabled, int newShardCount) throws Exception {
final GeneratedTestConfig generatedTestConfig = new GeneratedTestConfig(INPUT_FORMAT, getResourceAsString(JSON_INPUT_FORMAT_PATH));
try (final Closeable closer = createResourceCloser(generatedTestConfig);
final StreamEventWriter streamEventWriter = createStreamEventWriter(config, transactionEnabled)) {
final String taskSpec = generatedTestConfig.getStreamIngestionPropsTransform().apply(getResourceAsString(SUPERVISOR_SPEC_TEMPLATE_PATH));
LOG.info("supervisorSpec: [%s]\n", taskSpec);
// Start supervisor
generatedTestConfig.setSupervisorId(indexer.submitSupervisor(taskSpec));
LOG.info("Submitted supervisor");
// Start generating one third of the data (before resharding)
int secondsToGenerateRemaining = TOTAL_NUMBER_OF_SECOND;
int secondsToGenerateFirstRound = TOTAL_NUMBER_OF_SECOND / 3;
secondsToGenerateRemaining = secondsToGenerateRemaining - secondsToGenerateFirstRound;
final StreamGenerator streamGenerator = new WikipediaStreamEventStreamGenerator(new JsonEventSerializer(jsonMapper), EVENTS_PER_SECOND, CYCLE_PADDING_MS);
long numWritten = streamGenerator.run(generatedTestConfig.getStreamName(), streamEventWriter, secondsToGenerateFirstRound, FIRST_EVENT_TIME);
// Verify supervisor is healthy before resahrding
ITRetryUtil.retryUntil(() -> SupervisorStateManager.BasicState.RUNNING.equals(indexer.getSupervisorStatus(generatedTestConfig.getSupervisorId())), true, 10000, 30, "Waiting for supervisor to be healthy");
// Reshard the supervisor by split from STREAM_SHARD_COUNT to newShardCount and waits until the resharding starts
streamAdminClient.updatePartitionCount(generatedTestConfig.getStreamName(), newShardCount, true);
// Start generating one third of the data (while resharding)
int secondsToGenerateSecondRound = TOTAL_NUMBER_OF_SECOND / 3;
secondsToGenerateRemaining = secondsToGenerateRemaining - secondsToGenerateSecondRound;
numWritten += streamGenerator.run(generatedTestConfig.getStreamName(), streamEventWriter, secondsToGenerateSecondRound, FIRST_EVENT_TIME.plusSeconds(secondsToGenerateFirstRound));
// Wait for stream to finish resharding
ITRetryUtil.retryUntil(() -> streamAdminClient.isStreamActive(generatedTestConfig.getStreamName()), true, 10000, 30, "Waiting for stream to finish resharding");
ITRetryUtil.retryUntil(() -> streamAdminClient.verfiyPartitionCountUpdated(generatedTestConfig.getStreamName(), STREAM_SHARD_COUNT, newShardCount), true, 10000, 30, "Waiting for stream to finish resharding");
// Start generating remaining data (after resharding)
numWritten += streamGenerator.run(generatedTestConfig.getStreamName(), streamEventWriter, secondsToGenerateRemaining, FIRST_EVENT_TIME.plusSeconds(secondsToGenerateFirstRound + secondsToGenerateSecondRound));
// Verify supervisor is healthy after resahrding
ITRetryUtil.retryUntil(() -> SupervisorStateManager.BasicState.RUNNING.equals(indexer.getSupervisorStatus(generatedTestConfig.getSupervisorId())), true, 10000, 30, "Waiting for supervisor to be healthy");
// Verify that supervisor can catch up with the stream
verifyIngestedData(generatedTestConfig, numWritten);
}
// Verify that event thrown away count was not incremented by the reshard
List<TaskResponseObject> completedTasks = indexer.getCompleteTasksForDataSource(generatedTestConfig.getFullDatasourceName());
for (TaskResponseObject task : completedTasks) {
try {
RowIngestionMetersTotals stats = indexer.getTaskStats(task.getId());
Assert.assertEquals(0L, stats.getThrownAway());
} catch (Exception e) {
// Failed task may not have a task stats report. We can ignore it as the task did not consume any data
if (!task.getStatus().isFailure()) {
throw e;
}
}
}
}
use of org.apache.druid.segment.incremental.RowIngestionMetersTotals in project druid by druid-io.
the class ParallelIndexSupervisorTask method doGetRowStatsAndUnparseableEventsParallelSinglePhase.
private Pair<Map<String, Object>, Map<String, Object>> doGetRowStatsAndUnparseableEventsParallelSinglePhase(SinglePhaseParallelIndexTaskRunner parallelSinglePhaseRunner, boolean includeUnparseable) {
long processed = 0L;
long processedWithError = 0L;
long thrownAway = 0L;
long unparseable = 0L;
List<ParseExceptionReport> unparseableEvents = new ArrayList<>();
// Get stats from completed tasks
Map<String, PushedSegmentsReport> completedSubtaskReports = parallelSinglePhaseRunner.getReports();
for (PushedSegmentsReport pushedSegmentsReport : completedSubtaskReports.values()) {
Map<String, TaskReport> taskReport = pushedSegmentsReport.getTaskReport();
if (taskReport == null || taskReport.isEmpty()) {
LOG.warn("Got an empty task report from subtask: " + pushedSegmentsReport.getTaskId());
continue;
}
IngestionStatsAndErrorsTaskReport ingestionStatsAndErrorsReport = (IngestionStatsAndErrorsTaskReport) taskReport.get(IngestionStatsAndErrorsTaskReport.REPORT_KEY);
IngestionStatsAndErrorsTaskReportData reportData = (IngestionStatsAndErrorsTaskReportData) ingestionStatsAndErrorsReport.getPayload();
RowIngestionMetersTotals totals = getTotalsFromBuildSegmentsRowStats(reportData.getRowStats().get(RowIngestionMeters.BUILD_SEGMENTS));
if (includeUnparseable) {
List<ParseExceptionReport> taskUnparsebleEvents = (List<ParseExceptionReport>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
unparseableEvents.addAll(taskUnparsebleEvents);
}
processed += totals.getProcessed();
processedWithError += totals.getProcessedWithError();
thrownAway += totals.getThrownAway();
unparseable += totals.getUnparseable();
}
// Get stats from running tasks
Set<String> runningTaskIds = parallelSinglePhaseRunner.getRunningTaskIds();
for (String runningTaskId : runningTaskIds) {
try {
Map<String, Object> report = toolbox.getIndexingServiceClient().getTaskReport(runningTaskId);
if (report == null || report.isEmpty()) {
// task does not have a running report yet
continue;
}
Map<String, Object> ingestionStatsAndErrors = (Map<String, Object>) report.get("ingestionStatsAndErrors");
Map<String, Object> payload = (Map<String, Object>) ingestionStatsAndErrors.get("payload");
Map<String, Object> rowStats = (Map<String, Object>) payload.get("rowStats");
Map<String, Object> totals = (Map<String, Object>) rowStats.get("totals");
Map<String, Object> buildSegments = (Map<String, Object>) totals.get(RowIngestionMeters.BUILD_SEGMENTS);
if (includeUnparseable) {
Map<String, Object> taskUnparseableEvents = (Map<String, Object>) payload.get("unparseableEvents");
List<ParseExceptionReport> buildSegmentsUnparseableEvents = (List<ParseExceptionReport>) taskUnparseableEvents.get(RowIngestionMeters.BUILD_SEGMENTS);
unparseableEvents.addAll(buildSegmentsUnparseableEvents);
}
processed += ((Number) buildSegments.get("processed")).longValue();
processedWithError += ((Number) buildSegments.get("processedWithError")).longValue();
thrownAway += ((Number) buildSegments.get("thrownAway")).longValue();
unparseable += ((Number) buildSegments.get("unparseable")).longValue();
} catch (Exception e) {
LOG.warn(e, "Encountered exception when getting live subtask report for task: " + runningTaskId);
}
}
Map<String, Object> rowStatsMap = new HashMap<>();
Map<String, Object> totalsMap = new HashMap<>();
totalsMap.put(RowIngestionMeters.BUILD_SEGMENTS, new RowIngestionMetersTotals(processed, processedWithError, thrownAway, unparseable));
rowStatsMap.put("totals", totalsMap);
return Pair.of(rowStatsMap, ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, unparseableEvents));
}
Aggregations