use of org.apache.druid.segment.incremental.ParseExceptionReport in project druid by druid-io.
the class SinglePhaseParallelIndexingTest method testRunInSequential.
@Test
public void testRunInSequential() {
final Interval interval = Intervals.of("2017-12/P1M");
final boolean appendToExisting = false;
final ParallelIndexSupervisorTask task = newTask(interval, appendToExisting, false);
task.addToContext(Tasks.FORCE_TIME_CHUNK_LOCK_KEY, lockGranularity == LockGranularity.TIME_CHUNK);
Assert.assertEquals(TaskState.SUCCESS, getIndexingServiceClient().runAndWait(task).getStatusCode());
assertShardSpec(task, lockGranularity, appendToExisting, Collections.emptyList());
TaskContainer taskContainer = getIndexingServiceClient().getTaskContainer(task.getId());
final ParallelIndexSupervisorTask executedTask = (ParallelIndexSupervisorTask) taskContainer.getTask();
Map<String, Object> actualReports = executedTask.doGetLiveReports("full");
RowIngestionMetersTotals expectedTotals = new RowIngestionMetersTotals(10, 1, 1, 1);
List<ParseExceptionReport> expectedUnparseableEvents = ImmutableList.of(new ParseExceptionReport("{ts=2017unparseable}", "unparseable", ImmutableList.of(getErrorMessageForUnparseableTimestamp()), 1L), new ParseExceptionReport("{ts=2017-12-25, dim=0 th test file, val=badval}", "processedWithError", ImmutableList.of("Unable to parse value[badval] for field[val]"), 1L));
Map<String, Object> expectedReports;
if (useInputFormatApi) {
expectedReports = getExpectedTaskReportSequential(task.getId(), expectedUnparseableEvents, expectedTotals);
} else {
// when useInputFormatApi is false, maxConcurrentSubTasks=2 and it uses the single phase runner
// instead of sequential runner
expectedReports = getExpectedTaskReportParallel(task.getId(), expectedUnparseableEvents, expectedTotals);
}
compareTaskReports(expectedReports, actualReports);
System.out.println(actualReports);
}
use of org.apache.druid.segment.incremental.ParseExceptionReport in project druid by druid-io.
the class SinglePhaseParallelIndexingTest method testRunInParallelTaskReports.
@Test()
public void testRunInParallelTaskReports() {
ParallelIndexSupervisorTask task = runTestTask(Intervals.of("2017-12/P1M"), Granularities.DAY, false, Collections.emptyList());
Map<String, Object> actualReports = task.doGetLiveReports("full");
Map<String, Object> expectedReports = getExpectedTaskReportParallel(task.getId(), ImmutableList.of(new ParseExceptionReport("{ts=2017unparseable}", "unparseable", ImmutableList.of(getErrorMessageForUnparseableTimestamp()), 1L), new ParseExceptionReport("{ts=2017-12-25, dim=0 th test file, val=badval}", "processedWithError", ImmutableList.of("Unable to parse value[badval] for field[val]"), 1L)), new RowIngestionMetersTotals(10, 1, 1, 1));
compareTaskReports(expectedReports, actualReports);
}
use of org.apache.druid.segment.incremental.ParseExceptionReport in project druid by druid-io.
the class ParallelIndexSupervisorTask method doGetRowStatsAndUnparseableEventsParallelSinglePhase.
private Pair<Map<String, Object>, Map<String, Object>> doGetRowStatsAndUnparseableEventsParallelSinglePhase(SinglePhaseParallelIndexTaskRunner parallelSinglePhaseRunner, boolean includeUnparseable) {
long processed = 0L;
long processedWithError = 0L;
long thrownAway = 0L;
long unparseable = 0L;
List<ParseExceptionReport> unparseableEvents = new ArrayList<>();
// Get stats from completed tasks
Map<String, PushedSegmentsReport> completedSubtaskReports = parallelSinglePhaseRunner.getReports();
for (PushedSegmentsReport pushedSegmentsReport : completedSubtaskReports.values()) {
Map<String, TaskReport> taskReport = pushedSegmentsReport.getTaskReport();
if (taskReport == null || taskReport.isEmpty()) {
LOG.warn("Got an empty task report from subtask: " + pushedSegmentsReport.getTaskId());
continue;
}
IngestionStatsAndErrorsTaskReport ingestionStatsAndErrorsReport = (IngestionStatsAndErrorsTaskReport) taskReport.get(IngestionStatsAndErrorsTaskReport.REPORT_KEY);
IngestionStatsAndErrorsTaskReportData reportData = (IngestionStatsAndErrorsTaskReportData) ingestionStatsAndErrorsReport.getPayload();
RowIngestionMetersTotals totals = getTotalsFromBuildSegmentsRowStats(reportData.getRowStats().get(RowIngestionMeters.BUILD_SEGMENTS));
if (includeUnparseable) {
List<ParseExceptionReport> taskUnparsebleEvents = (List<ParseExceptionReport>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
unparseableEvents.addAll(taskUnparsebleEvents);
}
processed += totals.getProcessed();
processedWithError += totals.getProcessedWithError();
thrownAway += totals.getThrownAway();
unparseable += totals.getUnparseable();
}
// Get stats from running tasks
Set<String> runningTaskIds = parallelSinglePhaseRunner.getRunningTaskIds();
for (String runningTaskId : runningTaskIds) {
try {
Map<String, Object> report = toolbox.getIndexingServiceClient().getTaskReport(runningTaskId);
if (report == null || report.isEmpty()) {
// task does not have a running report yet
continue;
}
Map<String, Object> ingestionStatsAndErrors = (Map<String, Object>) report.get("ingestionStatsAndErrors");
Map<String, Object> payload = (Map<String, Object>) ingestionStatsAndErrors.get("payload");
Map<String, Object> rowStats = (Map<String, Object>) payload.get("rowStats");
Map<String, Object> totals = (Map<String, Object>) rowStats.get("totals");
Map<String, Object> buildSegments = (Map<String, Object>) totals.get(RowIngestionMeters.BUILD_SEGMENTS);
if (includeUnparseable) {
Map<String, Object> taskUnparseableEvents = (Map<String, Object>) payload.get("unparseableEvents");
List<ParseExceptionReport> buildSegmentsUnparseableEvents = (List<ParseExceptionReport>) taskUnparseableEvents.get(RowIngestionMeters.BUILD_SEGMENTS);
unparseableEvents.addAll(buildSegmentsUnparseableEvents);
}
processed += ((Number) buildSegments.get("processed")).longValue();
processedWithError += ((Number) buildSegments.get("processedWithError")).longValue();
thrownAway += ((Number) buildSegments.get("thrownAway")).longValue();
unparseable += ((Number) buildSegments.get("unparseable")).longValue();
} catch (Exception e) {
LOG.warn(e, "Encountered exception when getting live subtask report for task: " + runningTaskId);
}
}
Map<String, Object> rowStatsMap = new HashMap<>();
Map<String, Object> totalsMap = new HashMap<>();
totalsMap.put(RowIngestionMeters.BUILD_SEGMENTS, new RowIngestionMetersTotals(processed, processedWithError, thrownAway, unparseable));
rowStatsMap.put("totals", totalsMap);
return Pair.of(rowStatsMap, ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, unparseableEvents));
}
use of org.apache.druid.segment.incremental.ParseExceptionReport in project druid by druid-io.
the class KafkaSupervisorTest method testGetCurrentParseErrors.
@Test
public void testGetCurrentParseErrors() {
supervisor = getTestableSupervisor(1, 2, true, "PT1H", null, null, false, kafkaHost);
supervisor.addTaskGroupToActivelyReadingTaskGroup(supervisor.getTaskGroupIdForPartition(0), ImmutableMap.of(0, 0L), Optional.absent(), Optional.absent(), ImmutableSet.of("task1"), ImmutableSet.of());
supervisor.addTaskGroupToPendingCompletionTaskGroup(supervisor.getTaskGroupIdForPartition(1), ImmutableMap.of(0, 0L), Optional.absent(), Optional.absent(), ImmutableSet.of("task2"), ImmutableSet.of());
ParseExceptionReport exception1 = new ParseExceptionReport("testInput1", "unparseable", ImmutableList.of("detail1", "detail2"), 1000L);
ParseExceptionReport exception2 = new ParseExceptionReport("testInput2", "unparseable", ImmutableList.of("detail1", "detail2"), 2000L);
ParseExceptionReport exception3 = new ParseExceptionReport("testInput3", "unparseable", ImmutableList.of("detail1", "detail2"), 3000L);
ParseExceptionReport exception4 = new ParseExceptionReport("testInput4", "unparseable", ImmutableList.of("detail1", "detail2"), 4000L);
EasyMock.expect(taskClient.getParseErrorsAsync("task1")).andReturn(Futures.immediateFuture(ImmutableList.of(exception1, exception2))).times(1);
EasyMock.expect(taskClient.getParseErrorsAsync("task2")).andReturn(Futures.immediateFuture(ImmutableList.of(exception3, exception4))).times(1);
replayAll();
List<ParseExceptionReport> errors = supervisor.getParseErrors();
verifyAll();
Assert.assertEquals(ImmutableList.of(exception4, exception3, exception2, exception1), errors);
}
use of org.apache.druid.segment.incremental.ParseExceptionReport in project druid by druid-io.
the class SeekableStreamSupervisor method getCurrentParseErrors.
/**
* Collect parse errors from all tasks managed by this supervisor.
*
* @return A list of parse error strings
*
* @throws InterruptedException
* @throws ExecutionException
* @throws TimeoutException
*/
private List<ParseExceptionReport> getCurrentParseErrors() throws InterruptedException, ExecutionException, TimeoutException {
final List<ListenableFuture<ErrorsFromTaskResult>> futures = new ArrayList<>();
final List<Pair<Integer, String>> groupAndTaskIds = new ArrayList<>();
for (int groupId : activelyReadingTaskGroups.keySet()) {
TaskGroup group = activelyReadingTaskGroups.get(groupId);
for (String taskId : group.taskIds()) {
futures.add(Futures.transform(taskClient.getParseErrorsAsync(taskId), (Function<List<ParseExceptionReport>, ErrorsFromTaskResult>) (taskErrors) -> new ErrorsFromTaskResult(groupId, taskId, taskErrors)));
groupAndTaskIds.add(new Pair<>(groupId, taskId));
}
}
for (int groupId : pendingCompletionTaskGroups.keySet()) {
List<TaskGroup> pendingGroups = pendingCompletionTaskGroups.get(groupId);
for (TaskGroup pendingGroup : pendingGroups) {
for (String taskId : pendingGroup.taskIds()) {
futures.add(Futures.transform(taskClient.getParseErrorsAsync(taskId), (Function<List<ParseExceptionReport>, ErrorsFromTaskResult>) (taskErrors) -> new ErrorsFromTaskResult(groupId, taskId, taskErrors)));
groupAndTaskIds.add(new Pair<>(groupId, taskId));
}
}
}
// We use a tree set to sort the parse errors by time, and eliminate duplicates across calls to this method
TreeSet<ParseExceptionReport> parseErrorsTreeSet = new TreeSet<>(PARSE_EXCEPTION_REPORT_COMPARATOR);
parseErrorsTreeSet.addAll(lastKnownParseErrors);
List<ErrorsFromTaskResult> results = Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
for (int i = 0; i < results.size(); i++) {
ErrorsFromTaskResult result = results.get(i);
if (result != null) {
parseErrorsTreeSet.addAll(result.getErrors());
} else {
Pair<Integer, String> groupAndTaskId = groupAndTaskIds.get(i);
log.error("Failed to get errors for group[%d]-task[%s]", groupAndTaskId.lhs, groupAndTaskId.rhs);
}
}
SeekableStreamIndexTaskTuningConfig ss = spec.getSpec().getTuningConfig().convertToTaskTuningConfig();
SeekableStreamSupervisorIOConfig oo = spec.getSpec().getIOConfig();
// store a limited number of parse exceptions, keeping the most recent ones
int parseErrorLimit = spec.getSpec().getTuningConfig().convertToTaskTuningConfig().getMaxSavedParseExceptions() * spec.getSpec().getIOConfig().getTaskCount();
parseErrorLimit = Math.min(parseErrorLimit, parseErrorsTreeSet.size());
final List<ParseExceptionReport> limitedParseErrors = new ArrayList<>();
Iterator<ParseExceptionReport> descendingIterator = parseErrorsTreeSet.descendingIterator();
for (int i = 0; i < parseErrorLimit; i++) {
limitedParseErrors.add(descendingIterator.next());
}
return limitedParseErrors;
}
Aggregations