use of org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers in project druid by druid-io.
the class KafkaIndexTaskTest method testMultipleParseExceptionsFailure.
@Test(timeout = 60_000L)
public void testMultipleParseExceptionsFailure() throws Exception {
reportParseExceptions = false;
maxParseExceptions = 2;
maxSavedParseExceptions = 2;
// Insert data
insertData();
final KafkaIndexTask task = createTask(null, new KafkaIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 2L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 10L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, INPUT_FORMAT));
final ListenableFuture<TaskStatus> future = runTask(task);
TaskStatus status = future.get();
// Wait for task to exit
Assert.assertEquals(TaskState.FAILED, status.getStatusCode());
IndexTaskTest.checkTaskStatusErrorMsgForParseExceptionsExceeded(status);
// Check metrics
Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getProcessed());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getProcessedWithError());
Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getUnparseable());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getThrownAway());
// Check published metadata
Assert.assertEquals(ImmutableList.of(), publishedDescriptors());
Assert.assertNull(newDataSchemaMetadata());
IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED, 3, RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.UNPARSEABLE, 3, RowIngestionMeters.THROWN_AWAY, 0));
Assert.assertEquals(expectedMetrics, reportData.getRowStats());
List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
List<String> expectedMessages = Arrays.asList("Unable to parse [] as the intermediateRow resulted in empty input row (Record: 1)", "Unable to parse row [unparseable] (Record: 1)");
List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
return ((List<String>) r.get("details")).get(0);
}).collect(Collectors.toList());
Assert.assertEquals(expectedMessages, actualMessages);
List<String> expectedInputs = Arrays.asList("", "unparseable");
List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
return (String) r.get("input");
}).collect(Collectors.toList());
Assert.assertEquals(expectedInputs, actualInputs);
}
use of org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers in project druid by druid-io.
the class KafkaIndexTaskTest method testKafkaInputFormat.
@Test(timeout = 60_000L)
public void testKafkaInputFormat() throws Exception {
// Insert data
insertData(Iterables.limit(records, 3));
final KafkaIndexTask task = createTask(null, new DataSchema("test_ds", new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(Arrays.asList(new StringDimensionSchema("dim1"), new StringDimensionSchema("dim1t"), new StringDimensionSchema("dim2"), new LongDimensionSchema("dimLong"), new FloatDimensionSchema("dimFloat"), new StringDimensionSchema("kafka.testheader.encoding"))), new AggregatorFactory[] { new DoubleSumAggregatorFactory("met1sum", "met1"), new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), null), new KafkaIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 5L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, KAFKA_INPUT_FORMAT));
Assert.assertTrue(task.supportsQueries());
final ListenableFuture<TaskStatus> future = runTask(task);
while (countEvents(task) != 3) {
Thread.sleep(25);
}
Assert.assertEquals(Status.READING, task.getRunner().getStatus());
final QuerySegmentSpec interval = OBJECT_MAPPER.readValue("\"2008/2012\"", QuerySegmentSpec.class);
List<ScanResultValue> scanResultValues = scanData(task, interval);
// verify that there are no records indexed in the rollbacked time period
Assert.assertEquals(3, Iterables.size(scanResultValues));
int i = 0;
for (ScanResultValue result : scanResultValues) {
final Map<String, Object> event = ((List<Map<String, Object>>) result.getEvents()).get(0);
Assert.assertEquals("application/json", event.get("kafka.testheader.encoding"));
Assert.assertEquals("y", event.get("dim2"));
}
// insert remaining data
insertData(Iterables.skip(records, 3));
// Wait for task to exit
Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode());
// Check metrics
Assert.assertEquals(4, task.getRunner().getRowIngestionMeters().getProcessed());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getUnparseable());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getThrownAway());
}
use of org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers in project druid by druid-io.
the class KafkaIndexTaskTest method testRunOneTaskTwoPartitions.
@Test(timeout = 60_000L)
public void testRunOneTaskTwoPartitions() throws Exception {
final KafkaIndexTask task = createTask(null, new KafkaIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 2L, 1, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 5L, 1, 2L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, INPUT_FORMAT));
final ListenableFuture<TaskStatus> future = runTask(task);
// Insert data
insertData();
// Wait for tasks to exit
Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode());
// Check metrics
Assert.assertEquals(5, task.getRunner().getRowIngestionMeters().getProcessed());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getUnparseable());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getThrownAway());
// Check published segments & metadata
SegmentDescriptorAndExpectedDim1Values desc1 = sdd("2010/P1D", 0, ImmutableList.of("c"));
SegmentDescriptorAndExpectedDim1Values desc2 = sdd("2011/P1D", 0, ImmutableList.of("d", "e", "h"));
// desc3 will not be created in KafkaIndexTask (0.12.x) as it does not create per Kafka partition Druid segments
@SuppressWarnings("unused") SegmentDescriptor desc3 = sd("2011/P1D", 1);
SegmentDescriptorAndExpectedDim1Values desc4 = sdd("2012/P1D", 0, ImmutableList.of("g"));
assertEqualsExceptVersion(ImmutableList.of(desc1, desc2, desc4), publishedDescriptors());
Assert.assertEquals(new KafkaDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 5L, 1, 2L))), newDataSchemaMetadata());
}
use of org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers in project druid by druid-io.
the class KafkaIndexTaskTest method testRunWithTransformSpec.
@Test(timeout = 60_000L)
public void testRunWithTransformSpec() throws Exception {
final KafkaIndexTask task = createTask(null, NEW_DATA_SCHEMA.withTransformSpec(new TransformSpec(new SelectorDimFilter("dim1", "b", null), ImmutableList.of(new ExpressionTransform("dim1t", "concat(dim1,dim1)", ExprMacroTable.nil())))), new KafkaIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 5L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, INPUT_FORMAT));
final ListenableFuture<TaskStatus> future = runTask(task);
// Wait for the task to start reading
while (task.getRunner().getStatus() != Status.READING) {
Thread.sleep(10);
}
// Insert data
insertData();
// Wait for task to exit
Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode());
// Check metrics
Assert.assertEquals(1, task.getRunner().getRowIngestionMeters().getProcessed());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getUnparseable());
Assert.assertEquals(4, task.getRunner().getRowIngestionMeters().getThrownAway());
// Check published metadata
final List<SegmentDescriptor> publishedDescriptors = publishedDescriptors();
assertEqualsExceptVersion(ImmutableList.of(sdd("2009/P1D", 0)), publishedDescriptors);
Assert.assertEquals(new KafkaDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 5L))), newDataSchemaMetadata());
// Check segments in deep storage
Assert.assertEquals(ImmutableList.of("b"), readSegmentColumn("dim1", publishedDescriptors.get(0)));
Assert.assertEquals(ImmutableList.of("bb"), readSegmentColumn("dim1t", publishedDescriptors.get(0)));
}
use of org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers in project druid by druid-io.
the class KafkaIndexTaskTest method testSerde.
@Test
public void testSerde() throws Exception {
// This is both a serde test and a regression test for https://github.com/apache/druid/issues/7724.
final KafkaIndexTask task = createTask("taskid", NEW_DATA_SCHEMA.withTransformSpec(new TransformSpec(null, ImmutableList.of(new ExpressionTransform("beep", "nofunc()", ExprMacroTable.nil())))), new KafkaIndexTaskIOConfig(0, "sequence", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of()), ImmutableMap.of(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, INPUT_FORMAT));
final Task task1 = OBJECT_MAPPER.readValue(OBJECT_MAPPER.writeValueAsBytes(task), Task.class);
Assert.assertEquals(task, task1);
}
Aggregations