use of io.mantisrx.connector.kafka.KafkaAckable in project mantis by Netflix.
the class KafkaSourceTest method testKafkaSourceSingleConsumerReadsAllMessagesInOrderFromSinglePartition.
@Test
public void testKafkaSourceSingleConsumerReadsAllMessagesInOrderFromSinglePartition() throws InterruptedException {
String testTopic = "testTopic" + topicNum.incrementAndGet();
int numPartitions = 1;
kafkaServer.createTopic(testTopic, numPartitions);
int numMessages = 10;
for (int i = 0; i < numMessages; i++) {
ProducerRecord<String, String> keyedMessage = new ProducerRecord<>(testTopic, "{\"messageNum\":" + i + "}");
kafkaServer.sendMessages(keyedMessage);
}
KafkaSource kafkaSource = new KafkaSource(new NoopRegistry());
Context context = mock(Context.class);
Parameters params = ParameterTestUtils.createParameters(KafkaSourceParameters.TOPIC, testTopic, KafkaSourceParameters.PREFIX + ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest", KafkaSourceParameters.PREFIX + ConsumerConfig.GROUP_ID_CONFIG, "testKafkaConsumer-" + random.nextInt());
when(context.getParameters()).then((Answer<Parameters>) invocation -> params);
when(context.getWorkerInfo()).then((Answer<WorkerInfo>) invocation -> new WorkerInfo("testJobName", "testJobName-1", 1, 0, 1, MantisJobDurationType.Perpetual, "1.1.1.1"));
when(context.getJobId()).then((Answer<String>) invocation -> "testJobName-1");
Index index = new Index(0, 10);
Observable<Observable<KafkaAckable>> sourceObs = kafkaSource.call(context, index);
final CountDownLatch latch = new CountDownLatch(numMessages);
final AtomicInteger counter = new AtomicInteger(0);
sourceObs.flatMap(kafkaAckableObs -> kafkaAckableObs).map(kafkaAckable -> {
Optional<Map<String, Object>> parsedEvent = kafkaAckable.getKafkaData().getParsedEvent();
assertTrue(parsedEvent.isPresent());
assertEquals(counter.getAndIncrement(), parsedEvent.get().get("messageNum"));
LOGGER.info("got message on topic {} consumer Id {}", parsedEvent.get(), kafkaAckable.getKafkaData().getMantisKafkaConsumerId());
kafkaAckable.ack();
latch.countDown();
return parsedEvent;
}).subscribe();
assertTrue("timed out waiting to get all messages from Kafka", latch.await(10, TimeUnit.SECONDS));
kafkaServer.deleteTopic(testTopic);
}
use of io.mantisrx.connector.kafka.KafkaAckable in project mantis by Netflix.
the class KafkaSource method createBackPressuredConsumerObs.
/**
* Create an observable with back pressure semantics from the consumer records fetched using consumer.
*
* @param mantisKafkaConsumer non thread-safe KafkaConsumer
* @param kafkaSourceConfig configuration for the Mantis Kafka Source
*/
private Observable<KafkaAckable> createBackPressuredConsumerObs(final MantisKafkaConsumer<?> mantisKafkaConsumer, final MantisKafkaSourceConfig kafkaSourceConfig) {
CheckpointStrategy checkpointStrategy = mantisKafkaConsumer.getStrategy();
final CheckpointTrigger trigger = mantisKafkaConsumer.getTrigger();
final ConsumerMetrics consumerMetrics = mantisKafkaConsumer.getConsumerMetrics();
final TopicPartitionStateManager partitionStateManager = mantisKafkaConsumer.getPartitionStateManager();
int mantisKafkaConsumerId = mantisKafkaConsumer.getConsumerId();
SyncOnSubscribe<Iterator<ConsumerRecord<String, byte[]>>, KafkaAckable> syncOnSubscribe = SyncOnSubscribe.createStateful(() -> {
final ConsumerRecords<String, byte[]> records = mantisKafkaConsumer.poll(kafkaSourceConfig.getConsumerPollTimeoutMs());
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("topic listing for consumer {}", mantisKafkaConsumer.listTopics());
}
LOGGER.info("consumer subscribed to topic-partitions {}", mantisKafkaConsumer.assignment());
return records.iterator();
}, (consumerRecordIterator, observer) -> {
Iterator<ConsumerRecord<String, byte[]>> it = consumerRecordIterator;
final Set<TopicPartition> partitions = mantisKafkaConsumer.assignment();
if (trigger.shouldCheckpoint()) {
long startTime = System.currentTimeMillis();
final Map<TopicPartition, OffsetAndMetadata> checkpoint = partitionStateManager.createCheckpoint(partitions);
checkpointStrategy.persistCheckpoint(checkpoint);
long now = System.currentTimeMillis();
consumerMetrics.recordCheckpointDelay(now - startTime);
consumerMetrics.incrementCommitCount();
trigger.reset();
}
if (!done.get()) {
try {
if (!consumerRecordIterator.hasNext()) {
final ConsumerRecords<String, byte[]> consumerRecords = mantisKafkaConsumer.poll(kafkaSourceConfig.getConsumerPollTimeoutMs());
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("poll returned {} records", consumerRecords.count());
}
it = consumerRecords.iterator();
}
if (it.hasNext()) {
final ConsumerRecord<String, byte[]> m = it.next();
final TopicPartition topicPartition = new TopicPartition(m.topic(), m.partition());
consumerMetrics.incrementInCount();
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("updating read offset to " + m.offset() + " read " + m.value());
}
if (m.value() != null) {
try {
trigger.update(getPayloadSize(m));
if (kafkaSourceConfig.getParseMessageInSource()) {
final Parser parser = ParserType.parser(kafkaSourceConfig.getMessageParserType()).getParser();
if (parser.canParse(m.value())) {
final Map<String, Object> parsedKafkaValue = parser.parseMessage(m.value());
final KafkaData kafkaData = new KafkaData(m, Optional.ofNullable(parsedKafkaValue), Optional.ofNullable(m.key()), mantisKafkaConsumerId);
final KafkaAckable ackable = new KafkaAckable(kafkaData, ackSubject);
// record offset consumed in TopicPartitionStateManager before onNext to avoid race condition with Ack being processed before the consume is recorded
partitionStateManager.recordMessageRead(topicPartition, m.offset());
consumerMetrics.recordReadOffset(topicPartition, m.offset());
observer.onNext(ackable);
} else {
consumerMetrics.incrementParseFailureCount();
}
} else {
final KafkaData kafkaData = new KafkaData(m, Optional.empty(), Optional.ofNullable(m.key()), mantisKafkaConsumerId);
final KafkaAckable ackable = new KafkaAckable(kafkaData, ackSubject);
// record offset consumed in TopicPartitionStateManager before onNext to avoid race condition with Ack being processed before the consume is recorded
partitionStateManager.recordMessageRead(topicPartition, m.offset());
consumerMetrics.recordReadOffset(topicPartition, m.offset());
observer.onNext(ackable);
}
} catch (ParseException pe) {
consumerMetrics.incrementErrorCount();
LOGGER.warn("failed to parse {}:{} message {}", m.topic(), m.partition(), m.value(), pe);
}
} else {
consumerMetrics.incrementKafkaMessageValueNullCount();
}
} else {
consumerMetrics.incrementWaitForDataCount();
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Reached head of partition, waiting for more data");
}
TimeUnit.MILLISECONDS.sleep(200);
}
} catch (TimeoutException toe) {
consumerMetrics.incrementWaitForDataCount();
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Reached head of partition waiting for more data");
}
} catch (OffsetOutOfRangeException oore) {
LOGGER.warn("offsets out of range " + oore.partitions() + " will seek to beginning", oore);
final Set<TopicPartition> topicPartitionSet = oore.partitions();
for (TopicPartition tp : topicPartitionSet) {
LOGGER.info("partition {} consumer position {}", tp, mantisKafkaConsumer.position(tp));
}
mantisKafkaConsumer.seekToBeginning(oore.partitions().toArray(new TopicPartition[oore.partitions().size()]));
} catch (InvalidRecordException ire) {
consumerMetrics.incrementErrorCount();
LOGGER.warn("iterator error with invalid message. message will be dropped " + ire.getMessage());
} catch (KafkaException e) {
consumerMetrics.incrementErrorCount();
LOGGER.warn("Other Kafka exception, message will be dropped. " + e.getMessage());
} catch (InterruptedException ie) {
LOGGER.error("consumer interrupted", ie);
Thread.currentThread().interrupt();
} catch (Exception e) {
consumerMetrics.incrementErrorCount();
LOGGER.warn("caught exception", e);
}
} else {
mantisKafkaConsumer.close();
}
return it;
}, consumerRecordIterator -> {
LOGGER.info("closing Kafka consumer on unsubscribe" + mantisKafkaConsumer.toString());
mantisKafkaConsumer.close();
});
return Observable.create(syncOnSubscribe).subscribeOn(Schedulers.newThread()).doOnUnsubscribe(() -> LOGGER.info("consumer {} stopped due to unsubscribe", mantisKafkaConsumerId)).doOnError((t) -> {
LOGGER.error("consumer {} stopped due to error", mantisKafkaConsumerId, t);
consumerMetrics.incrementErrorCount();
}).doOnTerminate(() -> LOGGER.info("consumer {} terminated", mantisKafkaConsumerId));
}
use of io.mantisrx.connector.kafka.KafkaAckable in project mantis by Netflix.
the class KafkaSourceTest method testKafkaSourceMultipleConsumersReadsAllMessagesFromMultiplePartitions.
@Test
public void testKafkaSourceMultipleConsumersReadsAllMessagesFromMultiplePartitions() throws InterruptedException {
String testTopic = "testTopic" + topicNum.incrementAndGet();
int numPartitions = 2;
kafkaServer.createTopic(testTopic, numPartitions);
int numMessages = 10;
Set<Integer> outstandingMsgs = new ConcurrentSkipListSet<>();
for (int i = 0; i < numMessages; i++) {
ProducerRecord<String, String> keyedMessage = new ProducerRecord<>(testTopic, "{\"messageNum\":" + i + "}");
kafkaServer.sendMessages(keyedMessage);
outstandingMsgs.add(i);
}
KafkaSource kafkaSource = new KafkaSource(new NoopRegistry());
Context context = mock(Context.class);
Parameters params = ParameterTestUtils.createParameters(KafkaSourceParameters.NUM_KAFKA_CONSUMER_PER_WORKER, 2, KafkaSourceParameters.TOPIC, testTopic, KafkaSourceParameters.PREFIX + ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest", KafkaSourceParameters.PREFIX + ConsumerConfig.GROUP_ID_CONFIG, "testKafkaConsumer-" + random.nextInt());
when(context.getParameters()).then((Answer<Parameters>) invocation -> params);
when(context.getWorkerInfo()).then((Answer<WorkerInfo>) invocation -> new WorkerInfo("testJobName", "testJobName-1", 1, 0, 1, MantisJobDurationType.Perpetual, "1.1.1.1"));
when(context.getJobId()).then((Answer<String>) invocation -> "testJobName-1");
Index index = new Index(0, 10);
Observable<Observable<KafkaAckable>> sourceObs = kafkaSource.call(context, index);
final CountDownLatch latch = new CountDownLatch(numMessages);
final Map<Integer, Integer> lastMessageNumByConsumerId = new ConcurrentHashMap<>();
sourceObs.flatMap(kafkaAckableObs -> kafkaAckableObs).map(kafkaAckable -> {
Optional<Map<String, Object>> parsedEvent = kafkaAckable.getKafkaData().getParsedEvent();
assertTrue(parsedEvent.isPresent());
Integer messageNum = (Integer) parsedEvent.get().get("messageNum");
assertTrue(outstandingMsgs.contains(messageNum));
outstandingMsgs.remove(messageNum);
int mantisKafkaConsumerId = kafkaAckable.getKafkaData().getMantisKafkaConsumerId();
lastMessageNumByConsumerId.putIfAbsent(mantisKafkaConsumerId, -1);
// assert consumption of higher message numbers across consumer instances
assertTrue(messageNum > lastMessageNumByConsumerId.get(mantisKafkaConsumerId));
lastMessageNumByConsumerId.put(mantisKafkaConsumerId, messageNum);
LOGGER.info("got message on topic {} consumer id {}", parsedEvent.get(), mantisKafkaConsumerId);
kafkaAckable.ack();
latch.countDown();
return parsedEvent;
}).doOnError(t -> {
LOGGER.error("caught unexpected exception", t);
fail("test failed due to unexpected error " + t.getMessage());
}).subscribe();
assertTrue("timed out waiting to get all messages from Kafka", latch.await(10, TimeUnit.SECONDS));
assertEquals(0, outstandingMsgs.size());
assertTrue(lastMessageNumByConsumerId.keySet().size() == 2);
lastMessageNumByConsumerId.keySet().forEach(consumerId -> {
assertTrue(lastMessageNumByConsumerId.get(consumerId) >= 0);
});
kafkaServer.deleteTopic(testTopic);
}
use of io.mantisrx.connector.kafka.KafkaAckable in project mantis by Netflix.
the class KafkaSourceTest method testKafkaSourceSingleConsumerHandlesMessageParseFailures.
@Test
public void testKafkaSourceSingleConsumerHandlesMessageParseFailures() throws InterruptedException {
String testTopic = "testTopic" + topicNum.incrementAndGet();
int numPartitions = 1;
kafkaServer.createTopic(testTopic, numPartitions);
int numMessages = 10;
for (int i = 0; i < numMessages; i++) {
ProducerRecord<String, String> keyedMessage = new ProducerRecord<>(testTopic, "{\"messageNum\":" + i + "}");
kafkaServer.sendMessages(keyedMessage);
ProducerRecord<String, String> invalidJsonMessage = new ProducerRecord<>(testTopic, "{\"messageNum:" + i + "}");
kafkaServer.sendMessages(invalidJsonMessage);
}
KafkaSource kafkaSource = new KafkaSource(new NoopRegistry());
Context context = mock(Context.class);
Parameters params = ParameterTestUtils.createParameters(KafkaSourceParameters.TOPIC, testTopic, KafkaSourceParameters.PREFIX + ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest", KafkaSourceParameters.PREFIX + ConsumerConfig.GROUP_ID_CONFIG, "testKafkaConsumer-" + random.nextInt());
when(context.getParameters()).then((Answer<Parameters>) invocation -> params);
when(context.getWorkerInfo()).then((Answer<WorkerInfo>) invocation -> new WorkerInfo("testJobName", "testJobName-1", 1, 0, 1, MantisJobDurationType.Perpetual, "1.1.1.1"));
when(context.getJobId()).then((Answer<String>) invocation -> "testJobName-1");
Index index = new Index(0, 10);
Observable<Observable<KafkaAckable>> sourceObs = kafkaSource.call(context, index);
final CountDownLatch latch = new CountDownLatch(numMessages);
final AtomicInteger counter = new AtomicInteger(0);
sourceObs.flatMap(kafkaAckableObs -> kafkaAckableObs).map(kafkaAckable -> {
Optional<Map<String, Object>> parsedEvent = kafkaAckable.getKafkaData().getParsedEvent();
assertTrue(parsedEvent.isPresent());
assertEquals(counter.getAndIncrement(), parsedEvent.get().get("messageNum"));
LOGGER.info("got message on topic {} consumer Id {}", parsedEvent.get(), kafkaAckable.getKafkaData().getMantisKafkaConsumerId());
kafkaAckable.ack();
latch.countDown();
return parsedEvent;
}).subscribe();
assertTrue("timed out waiting to get all messages from Kafka", latch.await(30, TimeUnit.SECONDS));
kafkaServer.deleteTopic(testTopic);
}
use of io.mantisrx.connector.kafka.KafkaAckable in project mantis by Netflix.
the class KafkaSourceTest method testKafkaSourceMultipleConsumersStaticPartitionAssignment.
@Test
public void testKafkaSourceMultipleConsumersStaticPartitionAssignment() throws InterruptedException {
String testTopic = "testTopic" + topicNum.incrementAndGet();
int numConsumers = 3;
int numPartitions = 3;
kafkaServer.createTopic(testTopic, numPartitions);
int numMessages = 10;
Set<Integer> outstandingMsgs = new ConcurrentSkipListSet<>();
for (int i = 0; i < numMessages; i++) {
ProducerRecord<String, String> keyedMessage = new ProducerRecord<>(testTopic, "{\"messageNum\":" + i + "}");
kafkaServer.sendMessages(keyedMessage);
outstandingMsgs.add(i);
}
KafkaSource kafkaSource = new KafkaSource(new NoopRegistry());
Context context = mock(Context.class);
Parameters params = ParameterTestUtils.createParameters(KafkaSourceParameters.NUM_KAFKA_CONSUMER_PER_WORKER, numConsumers, KafkaSourceParameters.TOPIC, testTopic, KafkaSourceParameters.ENABLE_STATIC_PARTITION_ASSIGN, true, KafkaSourceParameters.TOPIC_PARTITION_COUNTS, testTopic + ":" + numPartitions, KafkaSourceParameters.PREFIX + ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest", KafkaSourceParameters.PREFIX + ConsumerConfig.GROUP_ID_CONFIG, "testKafkaConsumer-" + random.nextInt());
when(context.getParameters()).then((Answer<Parameters>) invocation -> params);
when(context.getWorkerInfo()).then((Answer<WorkerInfo>) invocation -> new WorkerInfo("testJobName", "testJobName-1", 1, 0, 1, MantisJobDurationType.Perpetual, "1.1.1.1"));
when(context.getJobId()).then((Answer<String>) invocation -> "testJobName-1");
// Force all consumer instances to be created on same JVM by setting total number of workers for this job to 1
int totalNumWorkerForJob = 1;
Index index = new Index(0, totalNumWorkerForJob);
Observable<Observable<KafkaAckable>> sourceObs = kafkaSource.call(context, index);
final CountDownLatch latch = new CountDownLatch(numMessages);
final Map<Integer, Integer> lastMessageNumByConsumerId = new ConcurrentHashMap<>();
sourceObs.flatMap(kafkaAckableObs -> kafkaAckableObs).map(kafkaAckable -> {
Optional<Map<String, Object>> parsedEvent = kafkaAckable.getKafkaData().getParsedEvent();
assertTrue(parsedEvent.isPresent());
Integer messageNum = (Integer) parsedEvent.get().get("messageNum");
assertTrue(outstandingMsgs.contains(messageNum));
outstandingMsgs.remove(messageNum);
int mantisKafkaConsumerId = kafkaAckable.getKafkaData().getMantisKafkaConsumerId();
lastMessageNumByConsumerId.putIfAbsent(mantisKafkaConsumerId, -1);
// assert consumption of higher message numbers across consumer instances
assertTrue(messageNum > lastMessageNumByConsumerId.get(mantisKafkaConsumerId));
lastMessageNumByConsumerId.put(mantisKafkaConsumerId, messageNum);
LOGGER.info("got message on topic {} consumer id {}", parsedEvent.get(), mantisKafkaConsumerId);
kafkaAckable.ack();
latch.countDown();
return parsedEvent;
}).doOnError(t -> {
LOGGER.error("caught unexpected exception", t);
fail("test failed due to unexpected error " + t.getMessage());
}).subscribe();
assertTrue("timed out waiting to get all messages from Kafka", latch.await(10, TimeUnit.SECONDS));
assertEquals(0, outstandingMsgs.size());
assertTrue(lastMessageNumByConsumerId.keySet().size() == numConsumers);
lastMessageNumByConsumerId.keySet().forEach(consumerId -> {
assertTrue(lastMessageNumByConsumerId.get(consumerId) >= 0);
});
kafkaServer.deleteTopic(testTopic);
}
Aggregations