use of org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers in project druid by druid-io.
the class KafkaIndexTaskTest method testMultipleParseExceptionsSuccess.
@Test(timeout = 60_000L)
public void testMultipleParseExceptionsSuccess() throws Exception {
reportParseExceptions = false;
maxParseExceptions = 6;
maxSavedParseExceptions = 6;
// Insert data
insertData();
final KafkaIndexTask task = createTask(null, new KafkaIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 2L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 13L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, INPUT_FORMAT));
final ListenableFuture<TaskStatus> future = runTask(task);
TaskStatus status = future.get();
// Wait for task to exit
Assert.assertEquals(TaskState.SUCCESS, status.getStatusCode());
Assert.assertNull(status.getErrorMsg());
// Check metrics
Assert.assertEquals(4, task.getRunner().getRowIngestionMeters().getProcessed());
Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getProcessedWithError());
Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getUnparseable());
Assert.assertEquals(1, task.getRunner().getRowIngestionMeters().getThrownAway());
// Check published metadata
assertEqualsExceptVersion(ImmutableList.of(sdd("2010/P1D", 0), sdd("2011/P1D", 0), sdd("2013/P1D", 0), sdd("2049/P1D", 0)), publishedDescriptors());
Assert.assertEquals(new KafkaDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 13L))), newDataSchemaMetadata());
IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED, 4, RowIngestionMeters.PROCESSED_WITH_ERROR, 3, RowIngestionMeters.UNPARSEABLE, 3, RowIngestionMeters.THROWN_AWAY, 1));
Assert.assertEquals(expectedMetrics, reportData.getRowStats());
List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
List<String> expectedMessages = Arrays.asList("Unable to parse value[notanumber] for field[met1]", "could not convert value [notanumber] to float", "could not convert value [notanumber] to long", "Unable to parse [] as the intermediateRow resulted in empty input row (Record: 1)", "Unable to parse row [unparseable] (Record: 1)", "Encountered row with timestamp[246140482-04-24T15:36:27.903Z] that cannot be represented as a long: [{timestamp=246140482-04-24T15:36:27.903Z, dim1=x, dim2=z, dimLong=10, dimFloat=20.0, met1=1.0}] (Record: 1)");
List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
return ((List<String>) r.get("details")).get(0);
}).collect(Collectors.toList());
Assert.assertEquals(expectedMessages, actualMessages);
List<String> expectedInputs = Arrays.asList("{timestamp=2049, dim1=f, dim2=y, dimLong=10, dimFloat=20.0, met1=notanumber}", "{timestamp=2049, dim1=f, dim2=y, dimLong=10, dimFloat=notanumber, met1=1.0}", "{timestamp=2049, dim1=f, dim2=y, dimLong=notanumber, dimFloat=20.0, met1=1.0}", "", "unparseable", "{timestamp=246140482-04-24T15:36:27.903Z, dim1=x, dim2=z, dimLong=10, dimFloat=20.0, met1=1.0}");
List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
return (String) r.get("input");
}).collect(Collectors.toList());
Assert.assertEquals(expectedInputs, actualInputs);
}
use of org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers in project druid by druid-io.
the class KafkaIndexTaskTest method testRunAfterDataInsertedLiveReport.
@Test(timeout = 60_000L)
public void testRunAfterDataInsertedLiveReport() throws Exception {
// Insert data
insertData();
final KafkaIndexTask task = createTask(null, new KafkaIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 2L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 12L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, INPUT_FORMAT));
final ListenableFuture<TaskStatus> future = runTask(task);
SeekableStreamIndexTaskRunner runner = task.getRunner();
while (true) {
Thread.sleep(1000);
if (runner.getStatus() == Status.PUBLISHING) {
break;
}
}
Map rowStats = runner.doGetRowStats();
Map totals = (Map) rowStats.get("totals");
RowIngestionMetersTotals buildSegments = (RowIngestionMetersTotals) totals.get("buildSegments");
Map movingAverages = (Map) rowStats.get("movingAverages");
Map buildSegments2 = (Map) movingAverages.get("buildSegments");
HashMap avg_1min = (HashMap) buildSegments2.get("1m");
HashMap avg_5min = (HashMap) buildSegments2.get("5m");
HashMap avg_15min = (HashMap) buildSegments2.get("15m");
runner.resume();
// Check metrics
Assert.assertEquals(buildSegments.getProcessed(), task.getRunner().getRowIngestionMeters().getProcessed());
Assert.assertEquals(buildSegments.getUnparseable(), task.getRunner().getRowIngestionMeters().getUnparseable());
Assert.assertEquals(buildSegments.getThrownAway(), task.getRunner().getRowIngestionMeters().getThrownAway());
Assert.assertEquals(avg_1min.get("processed"), 0.0);
Assert.assertEquals(avg_5min.get("processed"), 0.0);
Assert.assertEquals(avg_15min.get("processed"), 0.0);
// Wait for task to exit
Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode());
}
use of org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers in project druid by druid-io.
the class KafkaIndexTaskTest method testKafkaRecordEntityInputFormat.
@Test(timeout = 60_000L)
public void testKafkaRecordEntityInputFormat() throws Exception {
// Insert data
insertData(Iterables.limit(records, 3));
final KafkaIndexTask task = createTask(null, new DataSchema("test_ds", new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(Arrays.asList(new StringDimensionSchema("dim1"), new StringDimensionSchema("dim1t"), new StringDimensionSchema("dim2"), new LongDimensionSchema("dimLong"), new FloatDimensionSchema("dimFloat"), new StringDimensionSchema("kafka.topic"), new LongDimensionSchema("kafka.offset"), new StringDimensionSchema("kafka.header.encoding"))), new AggregatorFactory[] { new DoubleSumAggregatorFactory("met1sum", "met1"), new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), null), new KafkaIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 5L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, new TestKafkaInputFormat(INPUT_FORMAT)));
Assert.assertTrue(task.supportsQueries());
final ListenableFuture<TaskStatus> future = runTask(task);
while (countEvents(task) != 3) {
Thread.sleep(25);
}
Assert.assertEquals(Status.READING, task.getRunner().getStatus());
final QuerySegmentSpec interval = OBJECT_MAPPER.readValue("\"2008/2012\"", QuerySegmentSpec.class);
List<ScanResultValue> scanResultValues = scanData(task, interval);
// verify that there are no records indexed in the rollbacked time period
Assert.assertEquals(3, Iterables.size(scanResultValues));
int i = 0;
for (ScanResultValue result : scanResultValues) {
final Map<String, Object> event = ((List<Map<String, Object>>) result.getEvents()).get(0);
Assert.assertEquals((long) i++, event.get("kafka.offset"));
Assert.assertEquals(topic, event.get("kafka.topic"));
Assert.assertEquals("application/json", event.get("kafka.header.encoding"));
}
// insert remaining data
insertData(Iterables.skip(records, 3));
// Wait for task to exit
Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode());
// Check metrics
Assert.assertEquals(4, task.getRunner().getRowIngestionMeters().getProcessed());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getUnparseable());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getThrownAway());
}
use of org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers in project druid by druid-io.
the class KafkaIndexTaskTest method testRunTransactionModeRollback.
@Test(timeout = 60_000L)
public void testRunTransactionModeRollback() throws Exception {
final KafkaIndexTask task = createTask(null, new KafkaIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 13L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, INPUT_FORMAT));
final ListenableFuture<TaskStatus> future = runTask(task);
// Insert 2 records initially
try (final KafkaProducer<byte[], byte[]> kafkaProducer = kafkaServer.newProducer()) {
kafkaProducer.initTransactions();
kafkaProducer.beginTransaction();
for (ProducerRecord<byte[], byte[]> record : Iterables.limit(records, 2)) {
kafkaProducer.send(record).get();
}
kafkaProducer.commitTransaction();
}
while (countEvents(task) != 2) {
Thread.sleep(25);
}
Assert.assertEquals(2, countEvents(task));
Assert.assertEquals(Status.READING, task.getRunner().getStatus());
// verify the 2 indexed records
final QuerySegmentSpec firstInterval = OBJECT_MAPPER.readValue("\"2008/2010\"", QuerySegmentSpec.class);
Iterable<ScanResultValue> scanResultValues = scanData(task, firstInterval);
Assert.assertEquals(2, Iterables.size(scanResultValues));
// Insert 3 more records and rollback
try (final KafkaProducer<byte[], byte[]> kafkaProducer = kafkaServer.newProducer()) {
kafkaProducer.initTransactions();
kafkaProducer.beginTransaction();
for (ProducerRecord<byte[], byte[]> record : Iterables.limit(Iterables.skip(records, 2), 3)) {
kafkaProducer.send(record).get();
}
kafkaProducer.flush();
kafkaProducer.abortTransaction();
}
Assert.assertEquals(2, countEvents(task));
Assert.assertEquals(Status.READING, task.getRunner().getStatus());
final QuerySegmentSpec rollbackedInterval = OBJECT_MAPPER.readValue("\"2010/2012\"", QuerySegmentSpec.class);
scanResultValues = scanData(task, rollbackedInterval);
// verify that there are no records indexed in the rollbacked time period
Assert.assertEquals(0, Iterables.size(scanResultValues));
// Insert remaining data
try (final KafkaProducer<byte[], byte[]> kafkaProducer = kafkaServer.newProducer()) {
kafkaProducer.initTransactions();
kafkaProducer.beginTransaction();
for (ProducerRecord<byte[], byte[]> record : Iterables.skip(records, 5)) {
kafkaProducer.send(record).get();
}
kafkaProducer.commitTransaction();
}
final QuerySegmentSpec endInterval = OBJECT_MAPPER.readValue("\"2008/2049\"", QuerySegmentSpec.class);
Iterable<ScanResultValue> scanResultValues1 = scanData(task, endInterval);
Assert.assertEquals(2, Iterables.size(scanResultValues1));
Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode());
Assert.assertEquals(task.getRunner().getEndOffsets(), task.getRunner().getCurrentOffsets());
// Check metrics
Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getProcessed());
Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getUnparseable());
Assert.assertEquals(1, task.getRunner().getRowIngestionMeters().getThrownAway());
// Check published metadata and segments in deep storage
assertEqualsExceptVersion(ImmutableList.of(sdd("2008/P1D", 0, ImmutableList.of("a")), sdd("2009/P1D", 0, ImmutableList.of("b")), sdd("2013/P1D", 0, ImmutableList.of("f")), sdd("2049/P1D", 0, ImmutableList.of("f"))), publishedDescriptors());
Assert.assertEquals(new KafkaDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 13L))), newDataSchemaMetadata());
}
Aggregations