use of org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris in project wikidata-query-rdf by wikimedia.
the class KafkaPollerUnitTest method storedOffsetsFromBoth.
@Test
public void storedOffsetsFromBoth() throws RetryableException {
// Scenario where all offsets are loaded from both storage and timestamp
Collection<String> topics = ImmutableList.of("topictest", "othertopic", "thirdtopic");
KafkaOffsetsRepository offsetsRepository = mock(KafkaOffsetsRepository.class);
createTopicPartitions(1);
// capture args for assign
ArgumentCaptor<Collection<TopicPartition>> assignArgs = ArgumentCaptor.forClass((Class) Collection.class);
doNothing().when(consumer).assign(assignArgs.capture());
// capture args for seek
ArgumentCaptor<TopicPartition> seekTopics = ArgumentCaptor.forClass(TopicPartition.class);
ArgumentCaptor<Long> seekOffsets = ArgumentCaptor.forClass(Long.class);
doNothing().when(consumer).seek(seekTopics.capture(), seekOffsets.capture());
// Stored offsets
Map<TopicPartition, OffsetAndTimestamp> offsetMap = ImmutableMap.of(new TopicPartition("topictest", 0), new OffsetAndTimestamp(1, START_TIME.toEpochMilli()), new TopicPartition("othertopic", 0), new OffsetAndTimestamp(3, START_TIME.toEpochMilli()));
when(offsetsRepository.load(any())).thenReturn(offsetMap);
// Timestamp-driven offsets
when(consumer.offsetsForTimes(any())).thenAnswer(i -> {
Map<TopicPartition, Long> map = i.getArgumentAt(0, Map.class);
// Check that timestamps are OK
map.forEach((k, v) -> assertThat(v).isEqualTo(START_TIME.toEpochMilli()));
// All offsets are 500
return map.entrySet().stream().collect(Collectors.toMap(Entry::getKey, l -> new OffsetAndTimestamp(500L, l.getValue())));
});
when(consumer.poll(anyLong())).thenReturn(EMPTY_CHANGES);
KafkaPoller poller = new KafkaPoller(consumer, uris, START_TIME, BATCH_SIZE, topics, offsetsRepository, false, new MetricRegistry());
Batch batch = poller.firstBatch();
// should not call offsetsForTimes, since all offsets are in store
verify(consumer, times(1)).offsetsForTimes(any());
// We assign to 3 topics
verify(consumer, times(1)).assign(any());
assertThat(assignArgs.getValue()).hasSize(topics.size());
assertThat(seekOffsets.getAllValues()).hasSize(topics.size()).contains(// This offset is from timestamp
500L);
}
use of org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris in project wikidata-query-rdf by wikimedia.
the class KafkaPollerUnitTest method topicSubscribe.
@Test
public void topicSubscribe() throws RetryableException {
Collection<String> topics = ImmutableList.of("topictest", "othertopic");
// Each topic gets 2 partitions
ArgumentCaptor<String> partitionArgs = ArgumentCaptor.forClass(String.class);
createTopicPartitions(2, partitionArgs);
// Capture args for assign
ArgumentCaptor<Collection<TopicPartition>> assignArgs = ArgumentCaptor.forClass((Class) Collection.class);
doNothing().when(consumer).assign(assignArgs.capture());
when(consumer.offsetsForTimes(any())).thenAnswer(i -> {
Map<TopicPartition, Long> map = i.getArgumentAt(0, Map.class);
// Check that timestamps are OK
map.forEach((k, v) -> assertThat(v).isEqualTo(START_TIME.toEpochMilli()));
Map<TopicPartition, OffsetAndTimestamp> out = Maps.newHashMapWithExpectedSize(map.size());
// Make offset 1 for first partition and nothing for second
map.forEach((k, v) -> out.put(k, k.partition() == 0 ? new OffsetAndTimestamp(1000, v) : null));
// Using forEach here because collect() can't handle nulls
return out;
});
// capture args for seek
ArgumentCaptor<TopicPartition> seekArgs = ArgumentCaptor.forClass(TopicPartition.class);
doNothing().when(consumer).seek(seekArgs.capture(), eq(1000L));
ArgumentCaptor<Collection<TopicPartition>> seekBeginningArgs = ArgumentCaptor.forClass((Class) Collection.class);
doNothing().when(consumer).seekToEnd(seekBeginningArgs.capture());
when(consumer.poll(anyLong())).thenReturn(EMPTY_CHANGES);
KafkaPoller poller = new KafkaPoller(consumer, uris, START_TIME, BATCH_SIZE, topics, new DummyKafkaOffsetsRepository(), true, new MetricRegistry());
Batch batch = poller.firstBatch();
// We get partitions for both topics
verify(consumer, times(2)).partitionsFor(any());
assertThat(partitionArgs.getAllValues()).contains("topictest", "othertopic");
// We assign to 4 topics - 2 topics x 2 partitions
verify(consumer, times(1)).assign(any());
assertThat(assignArgs.getValue()).hasSize(4);
// Calling seek on both topics, partition 0
verify(consumer, times(2)).seek(any(), anyLong());
assertThat(seekArgs.getAllValues()).extracting(topicPartition -> topicPartition.topic()).contains("topictest", "othertopic");
assertThat(seekArgs.getAllValues()).extracting(tp -> tp.partition()).hasSize(2).containsOnly(0);
// Calling seekToEnd on both topics, partition 1
verify(consumer, times(2)).seekToEnd(any());
Collection<String> sbTopics = seekBeginningArgs.getAllValues().stream().flatMap(c -> c.stream()).map(tp -> tp.topic()).collect(toList());
assertThat(sbTopics).hasSize(2).contains("topictest", "othertopic");
Collection<Integer> sbPartitions = seekBeginningArgs.getAllValues().stream().flatMap(c -> c.stream()).map(tp -> tp.partition()).distinct().collect(toList());
assertThat(sbPartitions).hasSize(1).contains(1);
verify(consumer, times(1)).offsetsForTimes(any());
}
use of org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris in project wikidata-query-rdf by wikimedia.
the class KafkaPollerUnitTest method storedOffsetsFromStorage.
@Test
public void storedOffsetsFromStorage() throws RetryableException {
// Scenario where all offsets are loaded from storage
Collection<String> topics = ImmutableList.of("topictest", "othertopic");
KafkaOffsetsRepository offsetsRepository = mock(KafkaOffsetsRepository.class);
createTopicPartitions(2);
// capture args for assign
ArgumentCaptor<Collection<TopicPartition>> assignArgs = ArgumentCaptor.forClass((Class) Collection.class);
doNothing().when(consumer).assign(assignArgs.capture());
// capture args for seek
ArgumentCaptor<TopicPartition> seekTopics = ArgumentCaptor.forClass(TopicPartition.class);
ArgumentCaptor<Long> seekOffsets = ArgumentCaptor.forClass(Long.class);
doNothing().when(consumer).seek(seekTopics.capture(), seekOffsets.capture());
Map<TopicPartition, OffsetAndTimestamp> offsetMap = ImmutableMap.of(new TopicPartition("topictest", 0), new OffsetAndTimestamp(1, START_TIME.toEpochMilli()), new TopicPartition("topictest", 1), new OffsetAndTimestamp(2, START_TIME.toEpochMilli()), new TopicPartition("othertopic", 0), new OffsetAndTimestamp(3, START_TIME.toEpochMilli()), new TopicPartition("othertopic", 1), new OffsetAndTimestamp(4, START_TIME.toEpochMilli()));
when(offsetsRepository.load(any())).thenReturn(offsetMap);
when(consumer.poll(anyLong())).thenReturn(EMPTY_CHANGES);
KafkaPoller poller = new KafkaPoller(consumer, uris, START_TIME, BATCH_SIZE, topics, offsetsRepository, false, new MetricRegistry());
Batch batch = poller.firstBatch();
// should not call offsetsForTimes, since all offsets are in store
verify(consumer, times(0)).offsetsForTimes(any());
// We assign to 4 topics - 2 topics x 2 partitions
verify(consumer, times(1)).assign(any());
assertThat(assignArgs.getValue()).hasSize(4);
// Verify topics and offsets
assertThat(seekTopics.getAllValues()).containsExactlyInAnyOrderElementsOf(offsetMap.keySet());
List<Long> offsets = offsetMap.values().stream().map(o -> o.offset()).collect(toList());
assertThat(seekOffsets.getAllValues()).containsExactlyInAnyOrderElementsOf(offsets);
}
use of org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris in project wikidata-query-rdf by wikimedia.
the class RdfKafkaRepositoryIntegrationTest method readWriteOffsets.
@Test
public void readWriteOffsets() throws Exception {
Uris uris = new Uris(new URI("https://acme.test"), singleton(0L), "/api.php", "/entitydata");
Instant startTime = Instant.ofEpochMilli(BEGIN_DATE);
HttpClient httpClient = buildHttpClient(getHttpProxyHost(), getHttpProxyPort());
RdfClient rdfClient = new RdfClient(httpClient, url("/namespace/wdq/sparql"), buildHttpClientRetryer(), Duration.of(-1, SECONDS));
try {
rdfClient.update("CLEAR ALL");
KafkaOffsetsRepository kafkaOffsetsRepository = new RdfKafkaOffsetsRepository(uris.builder().build(), rdfClient);
Map<TopicPartition, OffsetAndMetadata> offsets = new HashMap<>();
offsets.put(new TopicPartition("topictest", 0), new OffsetAndMetadata(1L));
offsets.put(new TopicPartition("othertopic", 0), new OffsetAndMetadata(2L));
kafkaOffsetsRepository.store(offsets);
Map<TopicPartition, OffsetAndTimestamp> offsetsAndTimestamps = kafkaOffsetsRepository.load(startTime);
assertThat(offsetsAndTimestamps.get(new TopicPartition("topictest", 0)).offset()).isEqualTo(1L);
assertThat(offsetsAndTimestamps.get(new TopicPartition("othertopic", 0)).offset()).isEqualTo(2L);
offsets = new HashMap<>();
offsets.put(new TopicPartition("topictest", 0), new OffsetAndMetadata(3L));
offsets.put(new TopicPartition("othertopic", 0), new OffsetAndMetadata(4L));
kafkaOffsetsRepository.store(offsets);
offsetsAndTimestamps = kafkaOffsetsRepository.load(startTime);
assertThat(offsetsAndTimestamps.get(new TopicPartition("topictest", 0)).offset()).isEqualTo(3L);
assertThat(offsetsAndTimestamps.get(new TopicPartition("othertopic", 0)).offset()).isEqualTo(4L);
} finally {
rdfClient.update("CLEAR ALL");
httpClient.stop();
}
}
use of org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris in project wikidata-query-rdf by wikimedia.
the class KafkaPollerEventConsumptionUnitTest method initPoller.
private void initPoller(String... clusterNames) {
Uris uris = Uris.withWikidataDefaults("https://acme.test");
URI root = null;
try {
root = uris.builder().build();
} catch (URISyntaxException e) {
fail("failed to build UriScheme", e);
}
KafkaOffsetsRepository kafkaOffsetsRepository = new RdfKafkaOffsetsRepository(root, null);
Map<String, Class<? extends ChangeEvent>> topicsToClass = KafkaPoller.clusterNamesAwareTopics(Arrays.asList(clusterNames));
deserializer = new JsonDeserializer<>(topicsToClass);
poller = new KafkaPoller(consumer, uris, Instant.now(), 100, topicsToClass.keySet(), kafkaOffsetsRepository, true, new MetricRegistry());
}
Aggregations