use of org.wikidata.query.rdf.tool.change.KafkaPoller.Batch in project wikidata-query-rdf by wikimedia.
the class KafkaPollerUnitTest method storedOffsetsFromBoth.
@Test
public void storedOffsetsFromBoth() throws RetryableException {
// Scenario where all offsets are loaded from both storage and timestamp
Collection<String> topics = ImmutableList.of("topictest", "othertopic", "thirdtopic");
KafkaOffsetsRepository offsetsRepository = mock(KafkaOffsetsRepository.class);
createTopicPartitions(1);
// capture args for assign
ArgumentCaptor<Collection<TopicPartition>> assignArgs = ArgumentCaptor.forClass((Class) Collection.class);
doNothing().when(consumer).assign(assignArgs.capture());
// capture args for seek
ArgumentCaptor<TopicPartition> seekTopics = ArgumentCaptor.forClass(TopicPartition.class);
ArgumentCaptor<Long> seekOffsets = ArgumentCaptor.forClass(Long.class);
doNothing().when(consumer).seek(seekTopics.capture(), seekOffsets.capture());
// Stored offsets
Map<TopicPartition, OffsetAndTimestamp> offsetMap = ImmutableMap.of(new TopicPartition("topictest", 0), new OffsetAndTimestamp(1, START_TIME.toEpochMilli()), new TopicPartition("othertopic", 0), new OffsetAndTimestamp(3, START_TIME.toEpochMilli()));
when(offsetsRepository.load(any())).thenReturn(offsetMap);
// Timestamp-driven offsets
when(consumer.offsetsForTimes(any())).thenAnswer(i -> {
Map<TopicPartition, Long> map = i.getArgumentAt(0, Map.class);
// Check that timestamps are OK
map.forEach((k, v) -> assertThat(v).isEqualTo(START_TIME.toEpochMilli()));
// All offsets are 500
return map.entrySet().stream().collect(Collectors.toMap(Entry::getKey, l -> new OffsetAndTimestamp(500L, l.getValue())));
});
when(consumer.poll(anyLong())).thenReturn(EMPTY_CHANGES);
KafkaPoller poller = new KafkaPoller(consumer, uris, START_TIME, BATCH_SIZE, topics, offsetsRepository, false, new MetricRegistry());
Batch batch = poller.firstBatch();
// should not call offsetsForTimes, since all offsets are in store
verify(consumer, times(1)).offsetsForTimes(any());
// We assign to 3 topics
verify(consumer, times(1)).assign(any());
assertThat(assignArgs.getValue()).hasSize(topics.size());
assertThat(seekOffsets.getAllValues()).hasSize(topics.size()).contains(// This offset is from timestamp
500L);
}
use of org.wikidata.query.rdf.tool.change.KafkaPoller.Batch in project wikidata-query-rdf by wikimedia.
the class KafkaPollerUnitTest method noChanges.
@Test
public void noChanges() throws RetryableException {
KafkaPoller poller = makePoller();
when(consumer.poll(anyLong())).thenReturn(EMPTY_CHANGES);
Batch batch = poller.firstBatch();
assertThat(batch.changes()).hasSize(0);
assertThat(batch.hasAnyChanges()).isFalse();
}
use of org.wikidata.query.rdf.tool.change.KafkaPoller.Batch in project wikidata-query-rdf by wikimedia.
the class KafkaPollerUnitTest method topicSubscribe.
@Test
public void topicSubscribe() throws RetryableException {
Collection<String> topics = ImmutableList.of("topictest", "othertopic");
// Each topic gets 2 partitions
ArgumentCaptor<String> partitionArgs = ArgumentCaptor.forClass(String.class);
createTopicPartitions(2, partitionArgs);
// Capture args for assign
ArgumentCaptor<Collection<TopicPartition>> assignArgs = ArgumentCaptor.forClass((Class) Collection.class);
doNothing().when(consumer).assign(assignArgs.capture());
when(consumer.offsetsForTimes(any())).thenAnswer(i -> {
Map<TopicPartition, Long> map = i.getArgumentAt(0, Map.class);
// Check that timestamps are OK
map.forEach((k, v) -> assertThat(v).isEqualTo(START_TIME.toEpochMilli()));
Map<TopicPartition, OffsetAndTimestamp> out = Maps.newHashMapWithExpectedSize(map.size());
// Make offset 1 for first partition and nothing for second
map.forEach((k, v) -> out.put(k, k.partition() == 0 ? new OffsetAndTimestamp(1000, v) : null));
// Using forEach here because collect() can't handle nulls
return out;
});
// capture args for seek
ArgumentCaptor<TopicPartition> seekArgs = ArgumentCaptor.forClass(TopicPartition.class);
doNothing().when(consumer).seek(seekArgs.capture(), eq(1000L));
ArgumentCaptor<Collection<TopicPartition>> seekBeginningArgs = ArgumentCaptor.forClass((Class) Collection.class);
doNothing().when(consumer).seekToEnd(seekBeginningArgs.capture());
when(consumer.poll(anyLong())).thenReturn(EMPTY_CHANGES);
KafkaPoller poller = new KafkaPoller(consumer, uris, START_TIME, BATCH_SIZE, topics, new DummyKafkaOffsetsRepository(), true, new MetricRegistry());
Batch batch = poller.firstBatch();
// We get partitions for both topics
verify(consumer, times(2)).partitionsFor(any());
assertThat(partitionArgs.getAllValues()).contains("topictest", "othertopic");
// We assign to 4 topics - 2 topics x 2 partitions
verify(consumer, times(1)).assign(any());
assertThat(assignArgs.getValue()).hasSize(4);
// Calling seek on both topics, partition 0
verify(consumer, times(2)).seek(any(), anyLong());
assertThat(seekArgs.getAllValues()).extracting(topicPartition -> topicPartition.topic()).contains("topictest", "othertopic");
assertThat(seekArgs.getAllValues()).extracting(tp -> tp.partition()).hasSize(2).containsOnly(0);
// Calling seekToEnd on both topics, partition 1
verify(consumer, times(2)).seekToEnd(any());
Collection<String> sbTopics = seekBeginningArgs.getAllValues().stream().flatMap(c -> c.stream()).map(tp -> tp.topic()).collect(toList());
assertThat(sbTopics).hasSize(2).contains("topictest", "othertopic");
Collection<Integer> sbPartitions = seekBeginningArgs.getAllValues().stream().flatMap(c -> c.stream()).map(tp -> tp.partition()).distinct().collect(toList());
assertThat(sbPartitions).hasSize(1).contains(1);
verify(consumer, times(1)).offsetsForTimes(any());
}
use of org.wikidata.query.rdf.tool.change.KafkaPoller.Batch in project wikidata-query-rdf by wikimedia.
the class KafkaPollerUnitTest method multiPolls2.
@Test
public void multiPolls2() throws RetryableException {
KafkaPoller poller = makePoller();
ConsumerRecords<String, ChangeEvent> rs1 = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(20), 1, 5, "Q123"), "topictest", Duration.ofMillis(20)), makeRecord(makeRCEvent(Duration.ofMillis(30), 2, 2, "Q666", 1, DOMAIN), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(25), 3, 10, "Q6666", 0, "acme.wrong"), "topictest", Duration.ofMillis(20)));
ConsumerRecords<String, ChangeEvent> rs2 = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(30), 4, 1, "Q234"), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(30), 2, 2, "Q666", 1, DOMAIN), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(25), 3, 10, "Q6666", 0, "acme.wrong"), "topictest", Duration.ofMillis(20)));
ConsumerRecords<String, ChangeEvent> rs3 = makeRecords(makeRecord(makeRCEvent(Duration.ofMillis(30), 4, 2, "Q234"), "othertopic", Duration.ofMillis(21)), makeRecord(makeRCEvent(Duration.ofMillis(25), 3, 10, "Q6666", 0, "acme.wrong"), "topictest", Duration.ofMillis(20)), makeRecord(makeRCEvent(Duration.ofMillis(30), 1, 10, "Q123"), "othertopic", Duration.ofMillis(31)), makeRecord(makeRCEvent(Duration.ofMillis(30), 5, 21, "Q245"), "topictest", Duration.ofMillis(40)));
when(consumer.poll(anyLong())).thenReturn(rs1, rs2, rs3, EMPTY_CHANGES);
Batch batch = poller.firstBatch();
// If all three had good events, all three should be in the batch
assertThat(batch.changes()).hasSize(3).anyMatch(titleRevision("Q123", 10)).anyMatch(titleRevision("Q234", 2)).anyMatch(titleRevision("Q245", 21));
}
use of org.wikidata.query.rdf.tool.change.KafkaPoller.Batch in project wikidata-query-rdf by wikimedia.
the class KafkaPollerUnitTest method storedOffsetsFromStorage.
@Test
public void storedOffsetsFromStorage() throws RetryableException {
// Scenario where all offsets are loaded from storage
Collection<String> topics = ImmutableList.of("topictest", "othertopic");
KafkaOffsetsRepository offsetsRepository = mock(KafkaOffsetsRepository.class);
createTopicPartitions(2);
// capture args for assign
ArgumentCaptor<Collection<TopicPartition>> assignArgs = ArgumentCaptor.forClass((Class) Collection.class);
doNothing().when(consumer).assign(assignArgs.capture());
// capture args for seek
ArgumentCaptor<TopicPartition> seekTopics = ArgumentCaptor.forClass(TopicPartition.class);
ArgumentCaptor<Long> seekOffsets = ArgumentCaptor.forClass(Long.class);
doNothing().when(consumer).seek(seekTopics.capture(), seekOffsets.capture());
Map<TopicPartition, OffsetAndTimestamp> offsetMap = ImmutableMap.of(new TopicPartition("topictest", 0), new OffsetAndTimestamp(1, START_TIME.toEpochMilli()), new TopicPartition("topictest", 1), new OffsetAndTimestamp(2, START_TIME.toEpochMilli()), new TopicPartition("othertopic", 0), new OffsetAndTimestamp(3, START_TIME.toEpochMilli()), new TopicPartition("othertopic", 1), new OffsetAndTimestamp(4, START_TIME.toEpochMilli()));
when(offsetsRepository.load(any())).thenReturn(offsetMap);
when(consumer.poll(anyLong())).thenReturn(EMPTY_CHANGES);
KafkaPoller poller = new KafkaPoller(consumer, uris, START_TIME, BATCH_SIZE, topics, offsetsRepository, false, new MetricRegistry());
Batch batch = poller.firstBatch();
// should not call offsetsForTimes, since all offsets are in store
verify(consumer, times(0)).offsetsForTimes(any());
// We assign to 4 topics - 2 topics x 2 partitions
verify(consumer, times(1)).assign(any());
assertThat(assignArgs.getValue()).hasSize(4);
// Verify topics and offsets
assertThat(seekTopics.getAllValues()).containsExactlyInAnyOrderElementsOf(offsetMap.keySet());
List<Long> offsets = offsetMap.values().stream().map(o -> o.offset()).collect(toList());
assertThat(seekOffsets.getAllValues()).containsExactlyInAnyOrderElementsOf(offsets);
}
Aggregations