use of org.wikidata.query.rdf.tool.change.KafkaPoller.Batch in project wikidata-query-rdf by wikimedia.
the class KafkaPollerUnitTest method changesFromTopics.
@Test
public void changesFromTopics() throws RetryableException {
ConsumerRecords<String, ChangeEvent> rs = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(20), 1, 1, "Q123"), "topictest", Duration.ofMillis(20)), makeRecord(makeRCEvent(Duration.ofMillis(21), 2, 1, "Q234"), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(20), 3, 1, "Q567"), "topictest", Duration.ofMillis(20)));
Batch batch = getBatchFromRecords(rs);
assertThat(batch.changes()).hasSize(3).anyMatch(title("Q123")).anyMatch(title("Q234")).anyMatch(title("Q567"));
}
use of org.wikidata.query.rdf.tool.change.KafkaPoller.Batch in project wikidata-query-rdf by wikimedia.
the class KafkaPollerUnitTest method writeOffsets.
@Test
public void writeOffsets() throws RetryableException {
// Scenario where all offsets are loaded from both storage and timestamp
Collection<String> topics = ImmutableList.of("topictest", "othertopic", "thirdtopic");
KafkaOffsetsRepository offsetsRepository = mock(KafkaOffsetsRepository.class);
Map<TopicPartition, List<ConsumerRecord<String, ChangeEvent>>> records = new HashMap<>();
records.put(new TopicPartition("topictest", 0), Arrays.asList(new ConsumerRecord<>("topictest", 0, 2L, "1", newChange("Q1")), new ConsumerRecord<>("topictest", 0, 2L, "4", newChange("Q4"))));
records.put(new TopicPartition("othertopic", 0), Arrays.asList(new ConsumerRecord<>("othertopic", 0, 2L, "2", newChange("Q2")), new ConsumerRecord<>("othertopic", 0, 3L, "5", newChange("Q5"))));
records.put(new TopicPartition("thirdtopic", 0), singletonList(new ConsumerRecord<>("thirdtopic", 0, 2L, "3", newChange("Q3"))));
createTopicPartitions(1);
when(offsetsRepository.load(any())).thenReturn(ImmutableMap.of());
when(consumer.poll(anyLong())).thenReturn(new ConsumerRecords<>(records));
ArgumentCaptor<Map<TopicPartition, OffsetAndMetadata>> storeCaptor = ArgumentCaptor.forClass((Class) Map.class);
ArgumentCaptor<Map<TopicPartition, OffsetAndMetadata>> kafkaAsyncStoreCaptor = ArgumentCaptor.forClass((Class) Map.class);
ArgumentCaptor<Map<TopicPartition, OffsetAndMetadata>> kafkaSyncStoreCaptor = ArgumentCaptor.forClass((Class) Map.class);
doNothing().when(offsetsRepository).store(storeCaptor.capture());
doNothing().when(consumer).commitAsync(kafkaAsyncStoreCaptor.capture(), any());
doNothing().when(consumer).commitSync(kafkaSyncStoreCaptor.capture());
KafkaPoller poller = new KafkaPoller(consumer, uris, START_TIME, BATCH_SIZE, topics, offsetsRepository, true, new MetricRegistry());
Batch batch = poller.firstBatch();
poller.done(batch);
// Should be one update query
verify(offsetsRepository, times(1)).store(any());
assertThat(storeCaptor.getValue()).containsEntry(new TopicPartition("topictest", 0), new OffsetAndMetadata(2L)).containsEntry(new TopicPartition("othertopic", 0), new OffsetAndMetadata(3L)).containsEntry(new TopicPartition("thirdtopic", 0), new OffsetAndMetadata(2L));
poller.nextBatch(batch);
assertThat(kafkaAsyncStoreCaptor.getValue()).containsEntry(new TopicPartition("topictest", 0), new OffsetAndMetadata(2L)).containsEntry(new TopicPartition("othertopic", 0), new OffsetAndMetadata(3L)).containsEntry(new TopicPartition("thirdtopic", 0), new OffsetAndMetadata(2L));
poller.done(batch);
// Verify that the last offsets are committed synchronously when closing
poller.close();
assertThat(kafkaSyncStoreCaptor.getValue()).containsEntry(new TopicPartition("topictest", 0), new OffsetAndMetadata(2L)).containsEntry(new TopicPartition("othertopic", 0), new OffsetAndMetadata(3L)).containsEntry(new TopicPartition("thirdtopic", 0), new OffsetAndMetadata(2L));
}
use of org.wikidata.query.rdf.tool.change.KafkaPoller.Batch in project wikidata-query-rdf by wikimedia.
the class KafkaPollerUnitTest method multiPolls.
@Test
public void multiPolls() throws RetryableException {
KafkaPoller poller = makePoller();
ConsumerRecords<String, ChangeEvent> rs1 = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(20), 1, 5, "Q123"), "topictest", Duration.ofMillis(20)), makeRecord(makeRCEvent(Duration.ofMillis(30), 2, 2, "Q666", 1, DOMAIN), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(25), 3, 10, "Q6666", 0, "acme.wrong"), "topictest", Duration.ofMillis(20)));
ConsumerRecords<String, ChangeEvent> rs2 = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(30), 2, 2, "Q666", 1, DOMAIN), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(25), 3, 10, "Q6666", 0, "acme.wrong"), "topictest", Duration.ofMillis(20)));
ConsumerRecords<String, ChangeEvent> rs3 = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(30), 4, 2, "Q234"), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(25), 3, 10, "Q6666", 0, "acme.wrong"), "topictest", Duration.ofMillis(20)), makeRecord(makeRCEvent(Duration.ofMillis(30), 1, 10, "Q123"), "othertopic", Duration.ofMillis(31)), makeRecord(makeRCEvent(Duration.ofMillis(30), 5, 21, "Q245"), "topictest", Duration.ofMillis(40)));
when(consumer.poll(anyLong())).thenReturn(rs1, rs2, rs3, EMPTY_CHANGES);
Batch batch = poller.firstBatch();
// second batch did not have good messages, so the poller should return
// before third batch
assertThat(batch.changes()).hasSize(1).anyMatch(titleRevision("Q123", 5));
batch = poller.nextBatch(batch);
assertThat(batch.changes()).hasSize(3).anyMatch(titleRevision("Q123", 10)).anyMatch(title("Q234")).anyMatch(title("Q245"));
}
Aggregations