Search in sources :

Example 1 with ParseExceptionReport

use of org.apache.druid.segment.incremental.ParseExceptionReport in project druid by druid-io.

the class SinglePhaseParallelIndexingTest method testRunInSequential.

@Test
public void testRunInSequential() {
    final Interval interval = Intervals.of("2017-12/P1M");
    final boolean appendToExisting = false;
    final ParallelIndexSupervisorTask task = newTask(interval, appendToExisting, false);
    task.addToContext(Tasks.FORCE_TIME_CHUNK_LOCK_KEY, lockGranularity == LockGranularity.TIME_CHUNK);
    Assert.assertEquals(TaskState.SUCCESS, getIndexingServiceClient().runAndWait(task).getStatusCode());
    assertShardSpec(task, lockGranularity, appendToExisting, Collections.emptyList());
    TaskContainer taskContainer = getIndexingServiceClient().getTaskContainer(task.getId());
    final ParallelIndexSupervisorTask executedTask = (ParallelIndexSupervisorTask) taskContainer.getTask();
    Map<String, Object> actualReports = executedTask.doGetLiveReports("full");
    RowIngestionMetersTotals expectedTotals = new RowIngestionMetersTotals(10, 1, 1, 1);
    List<ParseExceptionReport> expectedUnparseableEvents = ImmutableList.of(new ParseExceptionReport("{ts=2017unparseable}", "unparseable", ImmutableList.of(getErrorMessageForUnparseableTimestamp()), 1L), new ParseExceptionReport("{ts=2017-12-25, dim=0 th test file, val=badval}", "processedWithError", ImmutableList.of("Unable to parse value[badval] for field[val]"), 1L));
    Map<String, Object> expectedReports;
    if (useInputFormatApi) {
        expectedReports = getExpectedTaskReportSequential(task.getId(), expectedUnparseableEvents, expectedTotals);
    } else {
        // when useInputFormatApi is false, maxConcurrentSubTasks=2 and it uses the single phase runner
        // instead of sequential runner
        expectedReports = getExpectedTaskReportParallel(task.getId(), expectedUnparseableEvents, expectedTotals);
    }
    compareTaskReports(expectedReports, actualReports);
    System.out.println(actualReports);
}
Also used : ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) RowIngestionMetersTotals(org.apache.druid.segment.incremental.RowIngestionMetersTotals) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 2 with ParseExceptionReport

use of org.apache.druid.segment.incremental.ParseExceptionReport in project druid by druid-io.

the class SinglePhaseParallelIndexingTest method testRunInParallelTaskReports.

@Test()
public void testRunInParallelTaskReports() {
    ParallelIndexSupervisorTask task = runTestTask(Intervals.of("2017-12/P1M"), Granularities.DAY, false, Collections.emptyList());
    Map<String, Object> actualReports = task.doGetLiveReports("full");
    Map<String, Object> expectedReports = getExpectedTaskReportParallel(task.getId(), ImmutableList.of(new ParseExceptionReport("{ts=2017unparseable}", "unparseable", ImmutableList.of(getErrorMessageForUnparseableTimestamp()), 1L), new ParseExceptionReport("{ts=2017-12-25, dim=0 th test file, val=badval}", "processedWithError", ImmutableList.of("Unable to parse value[badval] for field[val]"), 1L)), new RowIngestionMetersTotals(10, 1, 1, 1));
    compareTaskReports(expectedReports, actualReports);
}
Also used : ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) RowIngestionMetersTotals(org.apache.druid.segment.incremental.RowIngestionMetersTotals) Test(org.junit.Test)

Example 3 with ParseExceptionReport

use of org.apache.druid.segment.incremental.ParseExceptionReport in project druid by druid-io.

the class ParallelIndexSupervisorTask method doGetRowStatsAndUnparseableEventsParallelSinglePhase.

private Pair<Map<String, Object>, Map<String, Object>> doGetRowStatsAndUnparseableEventsParallelSinglePhase(SinglePhaseParallelIndexTaskRunner parallelSinglePhaseRunner, boolean includeUnparseable) {
    long processed = 0L;
    long processedWithError = 0L;
    long thrownAway = 0L;
    long unparseable = 0L;
    List<ParseExceptionReport> unparseableEvents = new ArrayList<>();
    // Get stats from completed tasks
    Map<String, PushedSegmentsReport> completedSubtaskReports = parallelSinglePhaseRunner.getReports();
    for (PushedSegmentsReport pushedSegmentsReport : completedSubtaskReports.values()) {
        Map<String, TaskReport> taskReport = pushedSegmentsReport.getTaskReport();
        if (taskReport == null || taskReport.isEmpty()) {
            LOG.warn("Got an empty task report from subtask: " + pushedSegmentsReport.getTaskId());
            continue;
        }
        IngestionStatsAndErrorsTaskReport ingestionStatsAndErrorsReport = (IngestionStatsAndErrorsTaskReport) taskReport.get(IngestionStatsAndErrorsTaskReport.REPORT_KEY);
        IngestionStatsAndErrorsTaskReportData reportData = (IngestionStatsAndErrorsTaskReportData) ingestionStatsAndErrorsReport.getPayload();
        RowIngestionMetersTotals totals = getTotalsFromBuildSegmentsRowStats(reportData.getRowStats().get(RowIngestionMeters.BUILD_SEGMENTS));
        if (includeUnparseable) {
            List<ParseExceptionReport> taskUnparsebleEvents = (List<ParseExceptionReport>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
            unparseableEvents.addAll(taskUnparsebleEvents);
        }
        processed += totals.getProcessed();
        processedWithError += totals.getProcessedWithError();
        thrownAway += totals.getThrownAway();
        unparseable += totals.getUnparseable();
    }
    // Get stats from running tasks
    Set<String> runningTaskIds = parallelSinglePhaseRunner.getRunningTaskIds();
    for (String runningTaskId : runningTaskIds) {
        try {
            Map<String, Object> report = toolbox.getIndexingServiceClient().getTaskReport(runningTaskId);
            if (report == null || report.isEmpty()) {
                // task does not have a running report yet
                continue;
            }
            Map<String, Object> ingestionStatsAndErrors = (Map<String, Object>) report.get("ingestionStatsAndErrors");
            Map<String, Object> payload = (Map<String, Object>) ingestionStatsAndErrors.get("payload");
            Map<String, Object> rowStats = (Map<String, Object>) payload.get("rowStats");
            Map<String, Object> totals = (Map<String, Object>) rowStats.get("totals");
            Map<String, Object> buildSegments = (Map<String, Object>) totals.get(RowIngestionMeters.BUILD_SEGMENTS);
            if (includeUnparseable) {
                Map<String, Object> taskUnparseableEvents = (Map<String, Object>) payload.get("unparseableEvents");
                List<ParseExceptionReport> buildSegmentsUnparseableEvents = (List<ParseExceptionReport>) taskUnparseableEvents.get(RowIngestionMeters.BUILD_SEGMENTS);
                unparseableEvents.addAll(buildSegmentsUnparseableEvents);
            }
            processed += ((Number) buildSegments.get("processed")).longValue();
            processedWithError += ((Number) buildSegments.get("processedWithError")).longValue();
            thrownAway += ((Number) buildSegments.get("thrownAway")).longValue();
            unparseable += ((Number) buildSegments.get("unparseable")).longValue();
        } catch (Exception e) {
            LOG.warn(e, "Encountered exception when getting live subtask report for task: " + runningTaskId);
        }
    }
    Map<String, Object> rowStatsMap = new HashMap<>();
    Map<String, Object> totalsMap = new HashMap<>();
    totalsMap.put(RowIngestionMeters.BUILD_SEGMENTS, new RowIngestionMetersTotals(processed, processedWithError, thrownAway, unparseable));
    rowStatsMap.put("totals", totalsMap);
    return Pair.of(rowStatsMap, ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, unparseableEvents));
}
Also used : IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) TaskReport(org.apache.druid.indexing.common.TaskReport) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) HashMap(java.util.HashMap) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) ArrayList(java.util.ArrayList) IOException(java.io.IOException) MaxAllowedLocksExceededException(org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException) ArrayList(java.util.ArrayList) List(java.util.List) RowIngestionMetersTotals(org.apache.druid.segment.incremental.RowIngestionMetersTotals) Map(java.util.Map) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 4 with ParseExceptionReport

use of org.apache.druid.segment.incremental.ParseExceptionReport in project druid by druid-io.

the class KafkaSupervisorTest method testGetCurrentParseErrors.

@Test
public void testGetCurrentParseErrors() {
    supervisor = getTestableSupervisor(1, 2, true, "PT1H", null, null, false, kafkaHost);
    supervisor.addTaskGroupToActivelyReadingTaskGroup(supervisor.getTaskGroupIdForPartition(0), ImmutableMap.of(0, 0L), Optional.absent(), Optional.absent(), ImmutableSet.of("task1"), ImmutableSet.of());
    supervisor.addTaskGroupToPendingCompletionTaskGroup(supervisor.getTaskGroupIdForPartition(1), ImmutableMap.of(0, 0L), Optional.absent(), Optional.absent(), ImmutableSet.of("task2"), ImmutableSet.of());
    ParseExceptionReport exception1 = new ParseExceptionReport("testInput1", "unparseable", ImmutableList.of("detail1", "detail2"), 1000L);
    ParseExceptionReport exception2 = new ParseExceptionReport("testInput2", "unparseable", ImmutableList.of("detail1", "detail2"), 2000L);
    ParseExceptionReport exception3 = new ParseExceptionReport("testInput3", "unparseable", ImmutableList.of("detail1", "detail2"), 3000L);
    ParseExceptionReport exception4 = new ParseExceptionReport("testInput4", "unparseable", ImmutableList.of("detail1", "detail2"), 4000L);
    EasyMock.expect(taskClient.getParseErrorsAsync("task1")).andReturn(Futures.immediateFuture(ImmutableList.of(exception1, exception2))).times(1);
    EasyMock.expect(taskClient.getParseErrorsAsync("task2")).andReturn(Futures.immediateFuture(ImmutableList.of(exception3, exception4))).times(1);
    replayAll();
    List<ParseExceptionReport> errors = supervisor.getParseErrors();
    verifyAll();
    Assert.assertEquals(ImmutableList.of(exception4, exception3, exception2, exception1), errors);
}
Also used : ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) Test(org.junit.Test)

Example 5 with ParseExceptionReport

use of org.apache.druid.segment.incremental.ParseExceptionReport in project druid by druid-io.

the class SeekableStreamSupervisor method getCurrentParseErrors.

/**
 * Collect parse errors from all tasks managed by this supervisor.
 *
 * @return A list of parse error strings
 *
 * @throws InterruptedException
 * @throws ExecutionException
 * @throws TimeoutException
 */
private List<ParseExceptionReport> getCurrentParseErrors() throws InterruptedException, ExecutionException, TimeoutException {
    final List<ListenableFuture<ErrorsFromTaskResult>> futures = new ArrayList<>();
    final List<Pair<Integer, String>> groupAndTaskIds = new ArrayList<>();
    for (int groupId : activelyReadingTaskGroups.keySet()) {
        TaskGroup group = activelyReadingTaskGroups.get(groupId);
        for (String taskId : group.taskIds()) {
            futures.add(Futures.transform(taskClient.getParseErrorsAsync(taskId), (Function<List<ParseExceptionReport>, ErrorsFromTaskResult>) (taskErrors) -> new ErrorsFromTaskResult(groupId, taskId, taskErrors)));
            groupAndTaskIds.add(new Pair<>(groupId, taskId));
        }
    }
    for (int groupId : pendingCompletionTaskGroups.keySet()) {
        List<TaskGroup> pendingGroups = pendingCompletionTaskGroups.get(groupId);
        for (TaskGroup pendingGroup : pendingGroups) {
            for (String taskId : pendingGroup.taskIds()) {
                futures.add(Futures.transform(taskClient.getParseErrorsAsync(taskId), (Function<List<ParseExceptionReport>, ErrorsFromTaskResult>) (taskErrors) -> new ErrorsFromTaskResult(groupId, taskId, taskErrors)));
                groupAndTaskIds.add(new Pair<>(groupId, taskId));
            }
        }
    }
    // We use a tree set to sort the parse errors by time, and eliminate duplicates across calls to this method
    TreeSet<ParseExceptionReport> parseErrorsTreeSet = new TreeSet<>(PARSE_EXCEPTION_REPORT_COMPARATOR);
    parseErrorsTreeSet.addAll(lastKnownParseErrors);
    List<ErrorsFromTaskResult> results = Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
    for (int i = 0; i < results.size(); i++) {
        ErrorsFromTaskResult result = results.get(i);
        if (result != null) {
            parseErrorsTreeSet.addAll(result.getErrors());
        } else {
            Pair<Integer, String> groupAndTaskId = groupAndTaskIds.get(i);
            log.error("Failed to get errors for group[%d]-task[%s]", groupAndTaskId.lhs, groupAndTaskId.rhs);
        }
    }
    SeekableStreamIndexTaskTuningConfig ss = spec.getSpec().getTuningConfig().convertToTaskTuningConfig();
    SeekableStreamSupervisorIOConfig oo = spec.getSpec().getIOConfig();
    // store a limited number of parse exceptions, keeping the most recent ones
    int parseErrorLimit = spec.getSpec().getTuningConfig().convertToTaskTuningConfig().getMaxSavedParseExceptions() * spec.getSpec().getIOConfig().getTaskCount();
    parseErrorLimit = Math.min(parseErrorLimit, parseErrorsTreeSet.size());
    final List<ParseExceptionReport> limitedParseErrors = new ArrayList<>();
    Iterator<ParseExceptionReport> descendingIterator = parseErrorsTreeSet.descendingIterator();
    for (int i = 0; i < parseErrorLimit; i++) {
        limitedParseErrors.add(descendingIterator.next());
    }
    return limitedParseErrors;
}
Also used : CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Function(com.google.common.base.Function) SeekableStreamIndexTaskTuningConfig(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskTuningConfig) TreeSet(java.util.TreeSet) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Pair(org.apache.druid.java.util.common.Pair)

Aggregations

ParseExceptionReport (org.apache.druid.segment.incremental.ParseExceptionReport)5 RowIngestionMetersTotals (org.apache.druid.segment.incremental.RowIngestionMetersTotals)3 Test (org.junit.Test)3 ArrayList (java.util.ArrayList)2 Function (com.google.common.base.Function)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 TreeMap (java.util.TreeMap)1 TreeSet (java.util.TreeSet)1 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 IngestionStatsAndErrorsTaskReport (org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport)1 IngestionStatsAndErrorsTaskReportData (org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData)1 TaskReport (org.apache.druid.indexing.common.TaskReport)1 MaxAllowedLocksExceededException (org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException)1 SeekableStreamIndexTaskTuningConfig (org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskTuningConfig)1