use of org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData in project druid by druid-io.
the class AppenderatorDriverRealtimeIndexTaskTest method testMultipleParseExceptionsFailure.
@Test(timeout = 60_000L)
public void testMultipleParseExceptionsFailure() throws Exception {
expectPublishedSegments(1);
final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, TransformSpec.NONE, false, 0, true, 3, 10);
final ListenableFuture<TaskStatus> statusFuture = runTask(task);
// Wait for firehose to show up, it starts off null.
while (task.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task.getFirehose();
firehose.addRows(Arrays.asList(// Good row- will be processed.
ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "met1", "1"), // Null row- will be thrown away.
null, // Bad metric- will count as processed, but that particular metric won't update.
ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "met1", "foo"), // Bad long dim- will count as processed, but bad dims will get default values
ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "dimLong", "notnumber", "dimFloat", "notnumber", "met1", "foo"), // Bad row- will be unparseable.
ImmutableMap.of("dim1", "foo", "met1", 2.0, FAIL_DIM, "x"), // Good row- will be processed.
ImmutableMap.of("t", 1521251960729L, "dim2", "bar", "met1", 2.0)));
// Stop the firehose, this will drain out existing events.
firehose.close();
// Wait for the task to finish.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskState.FAILED, taskStatus.getStatusCode());
Assert.assertTrue(taskStatus.getErrorMsg().contains("Max parse exceptions[3] exceeded"));
IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED, 1, RowIngestionMeters.PROCESSED_WITH_ERROR, 2, RowIngestionMeters.UNPARSEABLE, 2, RowIngestionMeters.THROWN_AWAY, 0));
Assert.assertEquals(expectedMetrics, reportData.getRowStats());
List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
List<String> expectedMessages = ImmutableList.of("Timestamp[null] is unparseable! Event: {dim1=foo, met1=2.0, __fail__=x}", "could not convert value [notnumber] to long", "Unable to parse value[foo] for field[met1]", "Timestamp[null] is unparseable! Event: null");
List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
return ((List<String>) r.get("details")).get(0);
}).collect(Collectors.toList());
Assert.assertEquals(expectedMessages, actualMessages);
List<String> expectedInputs = Arrays.asList("{dim1=foo, met1=2.0, __fail__=x}", "{t=1521251960729, dim1=foo, dimLong=notnumber, dimFloat=notnumber, met1=foo}", "{t=1521251960729, dim1=foo, met1=foo}", null);
List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
return (String) r.get("input");
}).collect(Collectors.toList());
Assert.assertEquals(expectedInputs, actualInputs);
Assert.assertEquals(IngestionState.BUILD_SEGMENTS, reportData.getIngestionState());
}
use of org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData in project druid by druid-io.
the class KafkaIndexTaskTest method testMultipleParseExceptionsSuccess.
@Test(timeout = 60_000L)
public void testMultipleParseExceptionsSuccess() throws Exception {
reportParseExceptions = false;
maxParseExceptions = 6;
maxSavedParseExceptions = 6;
// Insert data
insertData();
final KafkaIndexTask task = createTask(null, new KafkaIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 2L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 13L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, INPUT_FORMAT));
final ListenableFuture<TaskStatus> future = runTask(task);
TaskStatus status = future.get();
// Wait for task to exit
Assert.assertEquals(TaskState.SUCCESS, status.getStatusCode());
Assert.assertNull(status.getErrorMsg());
// Check metrics
Assert.assertEquals(4, task.getRunner().getRowIngestionMeters().getProcessed());
Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getProcessedWithError());
Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getUnparseable());
Assert.assertEquals(1, task.getRunner().getRowIngestionMeters().getThrownAway());
// Check published metadata
assertEqualsExceptVersion(ImmutableList.of(sdd("2010/P1D", 0), sdd("2011/P1D", 0), sdd("2013/P1D", 0), sdd("2049/P1D", 0)), publishedDescriptors());
Assert.assertEquals(new KafkaDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 13L))), newDataSchemaMetadata());
IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED, 4, RowIngestionMeters.PROCESSED_WITH_ERROR, 3, RowIngestionMeters.UNPARSEABLE, 3, RowIngestionMeters.THROWN_AWAY, 1));
Assert.assertEquals(expectedMetrics, reportData.getRowStats());
List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
List<String> expectedMessages = Arrays.asList("Unable to parse value[notanumber] for field[met1]", "could not convert value [notanumber] to float", "could not convert value [notanumber] to long", "Unable to parse [] as the intermediateRow resulted in empty input row (Record: 1)", "Unable to parse row [unparseable] (Record: 1)", "Encountered row with timestamp[246140482-04-24T15:36:27.903Z] that cannot be represented as a long: [{timestamp=246140482-04-24T15:36:27.903Z, dim1=x, dim2=z, dimLong=10, dimFloat=20.0, met1=1.0}] (Record: 1)");
List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
return ((List<String>) r.get("details")).get(0);
}).collect(Collectors.toList());
Assert.assertEquals(expectedMessages, actualMessages);
List<String> expectedInputs = Arrays.asList("{timestamp=2049, dim1=f, dim2=y, dimLong=10, dimFloat=20.0, met1=notanumber}", "{timestamp=2049, dim1=f, dim2=y, dimLong=10, dimFloat=notanumber, met1=1.0}", "{timestamp=2049, dim1=f, dim2=y, dimLong=notanumber, dimFloat=20.0, met1=1.0}", "", "unparseable", "{timestamp=246140482-04-24T15:36:27.903Z, dim1=x, dim2=z, dimLong=10, dimFloat=20.0, met1=1.0}");
List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
return (String) r.get("input");
}).collect(Collectors.toList());
Assert.assertEquals(expectedInputs, actualInputs);
}
Aggregations