Search in sources :

Example 11 with Batch

use of org.wikidata.query.rdf.tool.change.KafkaPoller.Batch in project wikidata-query-rdf by wikimedia.

the class KafkaPollerUnitTest method changesFromTopics.

@Test
public void changesFromTopics() throws RetryableException {
    ConsumerRecords<String, ChangeEvent> rs = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(20), 1, 1, "Q123"), "topictest", Duration.ofMillis(20)), makeRecord(makeRCEvent(Duration.ofMillis(21), 2, 1, "Q234"), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(20), 3, 1, "Q567"), "topictest", Duration.ofMillis(20)));
    Batch batch = getBatchFromRecords(rs);
    assertThat(batch.changes()).hasSize(3).anyMatch(title("Q123")).anyMatch(title("Q234")).anyMatch(title("Q567"));
}
Also used : ChangeEvent(org.wikidata.query.rdf.tool.change.events.ChangeEvent) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) Test(org.junit.Test)

Example 12 with Batch

use of org.wikidata.query.rdf.tool.change.KafkaPoller.Batch in project wikidata-query-rdf by wikimedia.

the class KafkaPollerUnitTest method writeOffsets.

@Test
public void writeOffsets() throws RetryableException {
    // Scenario where all offsets are loaded from both storage and timestamp
    Collection<String> topics = ImmutableList.of("topictest", "othertopic", "thirdtopic");
    KafkaOffsetsRepository offsetsRepository = mock(KafkaOffsetsRepository.class);
    Map<TopicPartition, List<ConsumerRecord<String, ChangeEvent>>> records = new HashMap<>();
    records.put(new TopicPartition("topictest", 0), Arrays.asList(new ConsumerRecord<>("topictest", 0, 2L, "1", newChange("Q1")), new ConsumerRecord<>("topictest", 0, 2L, "4", newChange("Q4"))));
    records.put(new TopicPartition("othertopic", 0), Arrays.asList(new ConsumerRecord<>("othertopic", 0, 2L, "2", newChange("Q2")), new ConsumerRecord<>("othertopic", 0, 3L, "5", newChange("Q5"))));
    records.put(new TopicPartition("thirdtopic", 0), singletonList(new ConsumerRecord<>("thirdtopic", 0, 2L, "3", newChange("Q3"))));
    createTopicPartitions(1);
    when(offsetsRepository.load(any())).thenReturn(ImmutableMap.of());
    when(consumer.poll(anyLong())).thenReturn(new ConsumerRecords<>(records));
    ArgumentCaptor<Map<TopicPartition, OffsetAndMetadata>> storeCaptor = ArgumentCaptor.forClass((Class) Map.class);
    ArgumentCaptor<Map<TopicPartition, OffsetAndMetadata>> kafkaAsyncStoreCaptor = ArgumentCaptor.forClass((Class) Map.class);
    ArgumentCaptor<Map<TopicPartition, OffsetAndMetadata>> kafkaSyncStoreCaptor = ArgumentCaptor.forClass((Class) Map.class);
    doNothing().when(offsetsRepository).store(storeCaptor.capture());
    doNothing().when(consumer).commitAsync(kafkaAsyncStoreCaptor.capture(), any());
    doNothing().when(consumer).commitSync(kafkaSyncStoreCaptor.capture());
    KafkaPoller poller = new KafkaPoller(consumer, uris, START_TIME, BATCH_SIZE, topics, offsetsRepository, true, new MetricRegistry());
    Batch batch = poller.firstBatch();
    poller.done(batch);
    // Should be one update query
    verify(offsetsRepository, times(1)).store(any());
    assertThat(storeCaptor.getValue()).containsEntry(new TopicPartition("topictest", 0), new OffsetAndMetadata(2L)).containsEntry(new TopicPartition("othertopic", 0), new OffsetAndMetadata(3L)).containsEntry(new TopicPartition("thirdtopic", 0), new OffsetAndMetadata(2L));
    poller.nextBatch(batch);
    assertThat(kafkaAsyncStoreCaptor.getValue()).containsEntry(new TopicPartition("topictest", 0), new OffsetAndMetadata(2L)).containsEntry(new TopicPartition("othertopic", 0), new OffsetAndMetadata(3L)).containsEntry(new TopicPartition("thirdtopic", 0), new OffsetAndMetadata(2L));
    poller.done(batch);
    // Verify that the last offsets are committed synchronously when closing
    poller.close();
    assertThat(kafkaSyncStoreCaptor.getValue()).containsEntry(new TopicPartition("topictest", 0), new OffsetAndMetadata(2L)).containsEntry(new TopicPartition("othertopic", 0), new OffsetAndMetadata(3L)).containsEntry(new TopicPartition("thirdtopic", 0), new OffsetAndMetadata(2L));
}
Also used : HashMap(java.util.HashMap) MetricRegistry(com.codahale.metrics.MetricRegistry) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) ChangeEvent(org.wikidata.query.rdf.tool.change.events.ChangeEvent) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) TopicPartition(org.apache.kafka.common.TopicPartition) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) Collections.singletonList(java.util.Collections.singletonList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 13 with Batch

use of org.wikidata.query.rdf.tool.change.KafkaPoller.Batch in project wikidata-query-rdf by wikimedia.

the class KafkaPollerUnitTest method multiPolls.

@Test
public void multiPolls() throws RetryableException {
    KafkaPoller poller = makePoller();
    ConsumerRecords<String, ChangeEvent> rs1 = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(20), 1, 5, "Q123"), "topictest", Duration.ofMillis(20)), makeRecord(makeRCEvent(Duration.ofMillis(30), 2, 2, "Q666", 1, DOMAIN), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(25), 3, 10, "Q6666", 0, "acme.wrong"), "topictest", Duration.ofMillis(20)));
    ConsumerRecords<String, ChangeEvent> rs2 = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(30), 2, 2, "Q666", 1, DOMAIN), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(25), 3, 10, "Q6666", 0, "acme.wrong"), "topictest", Duration.ofMillis(20)));
    ConsumerRecords<String, ChangeEvent> rs3 = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(30), 4, 2, "Q234"), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(25), 3, 10, "Q6666", 0, "acme.wrong"), "topictest", Duration.ofMillis(20)), makeRecord(makeRCEvent(Duration.ofMillis(30), 1, 10, "Q123"), "othertopic", Duration.ofMillis(31)), makeRecord(makeRCEvent(Duration.ofMillis(30), 5, 21, "Q245"), "topictest", Duration.ofMillis(40)));
    when(consumer.poll(anyLong())).thenReturn(rs1, rs2, rs3, EMPTY_CHANGES);
    Batch batch = poller.firstBatch();
    // second batch did not have good messages, so the poller should return
    // before third batch
    assertThat(batch.changes()).hasSize(1).anyMatch(titleRevision("Q123", 5));
    batch = poller.nextBatch(batch);
    assertThat(batch.changes()).hasSize(3).anyMatch(titleRevision("Q123", 10)).anyMatch(title("Q234")).anyMatch(title("Q245"));
}
Also used : ChangeEvent(org.wikidata.query.rdf.tool.change.events.ChangeEvent) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)13 Batch (org.wikidata.query.rdf.tool.change.KafkaPoller.Batch)13 ChangeEvent (org.wikidata.query.rdf.tool.change.events.ChangeEvent)12 MetricRegistry (com.codahale.metrics.MetricRegistry)4 ImmutableList (com.google.common.collect.ImmutableList)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 ArrayList (java.util.ArrayList)4 Collections.singletonList (java.util.Collections.singletonList)4 HashMap (java.util.HashMap)4 List (java.util.List)4 Map (java.util.Map)4 Collectors.toList (java.util.stream.Collectors.toList)4 ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)4 OffsetAndMetadata (org.apache.kafka.clients.consumer.OffsetAndMetadata)4 TopicPartition (org.apache.kafka.common.TopicPartition)4 Maps (com.google.common.collect.Maps)3 URI (java.net.URI)3 URISyntaxException (java.net.URISyntaxException)3 Duration (java.time.Duration)3 Arrays (java.util.Arrays)3