Search in sources :

Example 1 with Uris

use of org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris in project wikidata-query-rdf by wikimedia.

the class KafkaPollerUnitTest method storedOffsetsFromBoth.

@Test
public void storedOffsetsFromBoth() throws RetryableException {
    // Scenario where all offsets are loaded from both storage and timestamp
    Collection<String> topics = ImmutableList.of("topictest", "othertopic", "thirdtopic");
    KafkaOffsetsRepository offsetsRepository = mock(KafkaOffsetsRepository.class);
    createTopicPartitions(1);
    // capture args for assign
    ArgumentCaptor<Collection<TopicPartition>> assignArgs = ArgumentCaptor.forClass((Class) Collection.class);
    doNothing().when(consumer).assign(assignArgs.capture());
    // capture args for seek
    ArgumentCaptor<TopicPartition> seekTopics = ArgumentCaptor.forClass(TopicPartition.class);
    ArgumentCaptor<Long> seekOffsets = ArgumentCaptor.forClass(Long.class);
    doNothing().when(consumer).seek(seekTopics.capture(), seekOffsets.capture());
    // Stored offsets
    Map<TopicPartition, OffsetAndTimestamp> offsetMap = ImmutableMap.of(new TopicPartition("topictest", 0), new OffsetAndTimestamp(1, START_TIME.toEpochMilli()), new TopicPartition("othertopic", 0), new OffsetAndTimestamp(3, START_TIME.toEpochMilli()));
    when(offsetsRepository.load(any())).thenReturn(offsetMap);
    // Timestamp-driven offsets
    when(consumer.offsetsForTimes(any())).thenAnswer(i -> {
        Map<TopicPartition, Long> map = i.getArgumentAt(0, Map.class);
        // Check that timestamps are OK
        map.forEach((k, v) -> assertThat(v).isEqualTo(START_TIME.toEpochMilli()));
        // All offsets are 500
        return map.entrySet().stream().collect(Collectors.toMap(Entry::getKey, l -> new OffsetAndTimestamp(500L, l.getValue())));
    });
    when(consumer.poll(anyLong())).thenReturn(EMPTY_CHANGES);
    KafkaPoller poller = new KafkaPoller(consumer, uris, START_TIME, BATCH_SIZE, topics, offsetsRepository, false, new MetricRegistry());
    Batch batch = poller.firstBatch();
    // should not call offsetsForTimes, since all offsets are in store
    verify(consumer, times(1)).offsetsForTimes(any());
    // We assign to 3 topics
    verify(consumer, times(1)).assign(any());
    assertThat(assignArgs.getValue()).hasSize(topics.size());
    assertThat(seekOffsets.getAllValues()).hasSize(topics.size()).contains(// This offset is from timestamp
    500L);
}
Also used : Arrays(java.util.Arrays) ChangeEvent(org.wikidata.query.rdf.tool.change.events.ChangeEvent) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) URISyntaxException(java.net.URISyntaxException) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) Collections.singletonList(java.util.Collections.singletonList) Collections.singleton(java.util.Collections.singleton) Matchers.eq(org.mockito.Matchers.eq) Duration(java.time.Duration) Map(java.util.Map) URI(java.net.URI) TimestampType(org.apache.kafka.common.record.TimestampType) Uris(org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris) TopicPartition(org.apache.kafka.common.TopicPartition) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) Collection(java.util.Collection) Mockito.doNothing(org.mockito.Mockito.doNothing) PartitionInfo(org.apache.kafka.common.PartitionInfo) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) Collectors(java.util.stream.Collectors) Matchers.any(org.mockito.Matchers.any) START_TIME(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.START_TIME) List(java.util.List) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Entry(java.util.Map.Entry) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) Mockito.mock(org.mockito.Mockito.mock) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) Mock(org.mockito.Mock) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ArgumentCaptor(org.mockito.ArgumentCaptor) ImmutableList(com.google.common.collect.ImmutableList) Matchers.anyLong(org.mockito.Matchers.anyLong) RetryableException(org.wikidata.query.rdf.tool.exception.RetryableException) DOMAIN(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.DOMAIN) ChangeEventFixtures.makeDeleteEvent(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.makeDeleteEvent) Before(org.junit.Before) MetricRegistry(com.codahale.metrics.MetricRegistry) Mockito.times(org.mockito.Mockito.times) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) Maps(com.google.common.collect.Maps) Mockito.verify(org.mockito.Mockito.verify) Collectors.toList(java.util.stream.Collectors.toList) ChangeEventFixtures.makeRCEvent(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.makeRCEvent) MockitoJUnitRunner(org.mockito.runners.MockitoJUnitRunner) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) Collections(java.util.Collections) MetricRegistry(com.codahale.metrics.MetricRegistry) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) TopicPartition(org.apache.kafka.common.TopicPartition) Matchers.anyLong(org.mockito.Matchers.anyLong) Collection(java.util.Collection) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) Test(org.junit.Test)

Example 2 with Uris

use of org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris in project wikidata-query-rdf by wikimedia.

the class KafkaPollerUnitTest method topicSubscribe.

@Test
public void topicSubscribe() throws RetryableException {
    Collection<String> topics = ImmutableList.of("topictest", "othertopic");
    // Each topic gets 2 partitions
    ArgumentCaptor<String> partitionArgs = ArgumentCaptor.forClass(String.class);
    createTopicPartitions(2, partitionArgs);
    // Capture args for assign
    ArgumentCaptor<Collection<TopicPartition>> assignArgs = ArgumentCaptor.forClass((Class) Collection.class);
    doNothing().when(consumer).assign(assignArgs.capture());
    when(consumer.offsetsForTimes(any())).thenAnswer(i -> {
        Map<TopicPartition, Long> map = i.getArgumentAt(0, Map.class);
        // Check that timestamps are OK
        map.forEach((k, v) -> assertThat(v).isEqualTo(START_TIME.toEpochMilli()));
        Map<TopicPartition, OffsetAndTimestamp> out = Maps.newHashMapWithExpectedSize(map.size());
        // Make offset 1 for first partition and nothing for second
        map.forEach((k, v) -> out.put(k, k.partition() == 0 ? new OffsetAndTimestamp(1000, v) : null));
        // Using forEach here because collect() can't handle nulls
        return out;
    });
    // capture args for seek
    ArgumentCaptor<TopicPartition> seekArgs = ArgumentCaptor.forClass(TopicPartition.class);
    doNothing().when(consumer).seek(seekArgs.capture(), eq(1000L));
    ArgumentCaptor<Collection<TopicPartition>> seekBeginningArgs = ArgumentCaptor.forClass((Class) Collection.class);
    doNothing().when(consumer).seekToEnd(seekBeginningArgs.capture());
    when(consumer.poll(anyLong())).thenReturn(EMPTY_CHANGES);
    KafkaPoller poller = new KafkaPoller(consumer, uris, START_TIME, BATCH_SIZE, topics, new DummyKafkaOffsetsRepository(), true, new MetricRegistry());
    Batch batch = poller.firstBatch();
    // We get partitions for both topics
    verify(consumer, times(2)).partitionsFor(any());
    assertThat(partitionArgs.getAllValues()).contains("topictest", "othertopic");
    // We assign to 4 topics - 2 topics x 2 partitions
    verify(consumer, times(1)).assign(any());
    assertThat(assignArgs.getValue()).hasSize(4);
    // Calling seek on both topics, partition 0
    verify(consumer, times(2)).seek(any(), anyLong());
    assertThat(seekArgs.getAllValues()).extracting(topicPartition -> topicPartition.topic()).contains("topictest", "othertopic");
    assertThat(seekArgs.getAllValues()).extracting(tp -> tp.partition()).hasSize(2).containsOnly(0);
    // Calling seekToEnd on both topics, partition 1
    verify(consumer, times(2)).seekToEnd(any());
    Collection<String> sbTopics = seekBeginningArgs.getAllValues().stream().flatMap(c -> c.stream()).map(tp -> tp.topic()).collect(toList());
    assertThat(sbTopics).hasSize(2).contains("topictest", "othertopic");
    Collection<Integer> sbPartitions = seekBeginningArgs.getAllValues().stream().flatMap(c -> c.stream()).map(tp -> tp.partition()).distinct().collect(toList());
    assertThat(sbPartitions).hasSize(1).contains(1);
    verify(consumer, times(1)).offsetsForTimes(any());
}
Also used : Arrays(java.util.Arrays) ChangeEvent(org.wikidata.query.rdf.tool.change.events.ChangeEvent) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) URISyntaxException(java.net.URISyntaxException) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) Collections.singletonList(java.util.Collections.singletonList) Collections.singleton(java.util.Collections.singleton) Matchers.eq(org.mockito.Matchers.eq) Duration(java.time.Duration) Map(java.util.Map) URI(java.net.URI) TimestampType(org.apache.kafka.common.record.TimestampType) Uris(org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris) TopicPartition(org.apache.kafka.common.TopicPartition) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) Collection(java.util.Collection) Mockito.doNothing(org.mockito.Mockito.doNothing) PartitionInfo(org.apache.kafka.common.PartitionInfo) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) Collectors(java.util.stream.Collectors) Matchers.any(org.mockito.Matchers.any) START_TIME(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.START_TIME) List(java.util.List) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Entry(java.util.Map.Entry) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) Mockito.mock(org.mockito.Mockito.mock) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) Mock(org.mockito.Mock) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ArgumentCaptor(org.mockito.ArgumentCaptor) ImmutableList(com.google.common.collect.ImmutableList) Matchers.anyLong(org.mockito.Matchers.anyLong) RetryableException(org.wikidata.query.rdf.tool.exception.RetryableException) DOMAIN(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.DOMAIN) ChangeEventFixtures.makeDeleteEvent(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.makeDeleteEvent) Before(org.junit.Before) MetricRegistry(com.codahale.metrics.MetricRegistry) Mockito.times(org.mockito.Mockito.times) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) Maps(com.google.common.collect.Maps) Mockito.verify(org.mockito.Mockito.verify) Collectors.toList(java.util.stream.Collectors.toList) ChangeEventFixtures.makeRCEvent(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.makeRCEvent) MockitoJUnitRunner(org.mockito.runners.MockitoJUnitRunner) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) Collections(java.util.Collections) MetricRegistry(com.codahale.metrics.MetricRegistry) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) TopicPartition(org.apache.kafka.common.TopicPartition) Matchers.anyLong(org.mockito.Matchers.anyLong) Collection(java.util.Collection) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) Test(org.junit.Test)

Example 3 with Uris

use of org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris in project wikidata-query-rdf by wikimedia.

the class KafkaPollerUnitTest method storedOffsetsFromStorage.

@Test
public void storedOffsetsFromStorage() throws RetryableException {
    // Scenario where all offsets are loaded from storage
    Collection<String> topics = ImmutableList.of("topictest", "othertopic");
    KafkaOffsetsRepository offsetsRepository = mock(KafkaOffsetsRepository.class);
    createTopicPartitions(2);
    // capture args for assign
    ArgumentCaptor<Collection<TopicPartition>> assignArgs = ArgumentCaptor.forClass((Class) Collection.class);
    doNothing().when(consumer).assign(assignArgs.capture());
    // capture args for seek
    ArgumentCaptor<TopicPartition> seekTopics = ArgumentCaptor.forClass(TopicPartition.class);
    ArgumentCaptor<Long> seekOffsets = ArgumentCaptor.forClass(Long.class);
    doNothing().when(consumer).seek(seekTopics.capture(), seekOffsets.capture());
    Map<TopicPartition, OffsetAndTimestamp> offsetMap = ImmutableMap.of(new TopicPartition("topictest", 0), new OffsetAndTimestamp(1, START_TIME.toEpochMilli()), new TopicPartition("topictest", 1), new OffsetAndTimestamp(2, START_TIME.toEpochMilli()), new TopicPartition("othertopic", 0), new OffsetAndTimestamp(3, START_TIME.toEpochMilli()), new TopicPartition("othertopic", 1), new OffsetAndTimestamp(4, START_TIME.toEpochMilli()));
    when(offsetsRepository.load(any())).thenReturn(offsetMap);
    when(consumer.poll(anyLong())).thenReturn(EMPTY_CHANGES);
    KafkaPoller poller = new KafkaPoller(consumer, uris, START_TIME, BATCH_SIZE, topics, offsetsRepository, false, new MetricRegistry());
    Batch batch = poller.firstBatch();
    // should not call offsetsForTimes, since all offsets are in store
    verify(consumer, times(0)).offsetsForTimes(any());
    // We assign to 4 topics - 2 topics x 2 partitions
    verify(consumer, times(1)).assign(any());
    assertThat(assignArgs.getValue()).hasSize(4);
    // Verify topics and offsets
    assertThat(seekTopics.getAllValues()).containsExactlyInAnyOrderElementsOf(offsetMap.keySet());
    List<Long> offsets = offsetMap.values().stream().map(o -> o.offset()).collect(toList());
    assertThat(seekOffsets.getAllValues()).containsExactlyInAnyOrderElementsOf(offsets);
}
Also used : Arrays(java.util.Arrays) ChangeEvent(org.wikidata.query.rdf.tool.change.events.ChangeEvent) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) URISyntaxException(java.net.URISyntaxException) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) Collections.singletonList(java.util.Collections.singletonList) Collections.singleton(java.util.Collections.singleton) Matchers.eq(org.mockito.Matchers.eq) Duration(java.time.Duration) Map(java.util.Map) URI(java.net.URI) TimestampType(org.apache.kafka.common.record.TimestampType) Uris(org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris) TopicPartition(org.apache.kafka.common.TopicPartition) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) Collection(java.util.Collection) Mockito.doNothing(org.mockito.Mockito.doNothing) PartitionInfo(org.apache.kafka.common.PartitionInfo) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) Collectors(java.util.stream.Collectors) Matchers.any(org.mockito.Matchers.any) START_TIME(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.START_TIME) List(java.util.List) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Entry(java.util.Map.Entry) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) Mockito.mock(org.mockito.Mockito.mock) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) Mock(org.mockito.Mock) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ArgumentCaptor(org.mockito.ArgumentCaptor) ImmutableList(com.google.common.collect.ImmutableList) Matchers.anyLong(org.mockito.Matchers.anyLong) RetryableException(org.wikidata.query.rdf.tool.exception.RetryableException) DOMAIN(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.DOMAIN) ChangeEventFixtures.makeDeleteEvent(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.makeDeleteEvent) Before(org.junit.Before) MetricRegistry(com.codahale.metrics.MetricRegistry) Mockito.times(org.mockito.Mockito.times) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) Maps(com.google.common.collect.Maps) Mockito.verify(org.mockito.Mockito.verify) Collectors.toList(java.util.stream.Collectors.toList) ChangeEventFixtures.makeRCEvent(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.makeRCEvent) MockitoJUnitRunner(org.mockito.runners.MockitoJUnitRunner) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) Collections(java.util.Collections) MetricRegistry(com.codahale.metrics.MetricRegistry) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) TopicPartition(org.apache.kafka.common.TopicPartition) Matchers.anyLong(org.mockito.Matchers.anyLong) Collection(java.util.Collection) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) Test(org.junit.Test)

Example 4 with Uris

use of org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris in project wikidata-query-rdf by wikimedia.

the class RdfKafkaRepositoryIntegrationTest method readWriteOffsets.

@Test
public void readWriteOffsets() throws Exception {
    Uris uris = new Uris(new URI("https://acme.test"), singleton(0L), "/api.php", "/entitydata");
    Instant startTime = Instant.ofEpochMilli(BEGIN_DATE);
    HttpClient httpClient = buildHttpClient(getHttpProxyHost(), getHttpProxyPort());
    RdfClient rdfClient = new RdfClient(httpClient, url("/namespace/wdq/sparql"), buildHttpClientRetryer(), Duration.of(-1, SECONDS));
    try {
        rdfClient.update("CLEAR ALL");
        KafkaOffsetsRepository kafkaOffsetsRepository = new RdfKafkaOffsetsRepository(uris.builder().build(), rdfClient);
        Map<TopicPartition, OffsetAndMetadata> offsets = new HashMap<>();
        offsets.put(new TopicPartition("topictest", 0), new OffsetAndMetadata(1L));
        offsets.put(new TopicPartition("othertopic", 0), new OffsetAndMetadata(2L));
        kafkaOffsetsRepository.store(offsets);
        Map<TopicPartition, OffsetAndTimestamp> offsetsAndTimestamps = kafkaOffsetsRepository.load(startTime);
        assertThat(offsetsAndTimestamps.get(new TopicPartition("topictest", 0)).offset()).isEqualTo(1L);
        assertThat(offsetsAndTimestamps.get(new TopicPartition("othertopic", 0)).offset()).isEqualTo(2L);
        offsets = new HashMap<>();
        offsets.put(new TopicPartition("topictest", 0), new OffsetAndMetadata(3L));
        offsets.put(new TopicPartition("othertopic", 0), new OffsetAndMetadata(4L));
        kafkaOffsetsRepository.store(offsets);
        offsetsAndTimestamps = kafkaOffsetsRepository.load(startTime);
        assertThat(offsetsAndTimestamps.get(new TopicPartition("topictest", 0)).offset()).isEqualTo(3L);
        assertThat(offsetsAndTimestamps.get(new TopicPartition("othertopic", 0)).offset()).isEqualTo(4L);
    } finally {
        rdfClient.update("CLEAR ALL");
        httpClient.stop();
    }
}
Also used : HashMap(java.util.HashMap) Instant(java.time.Instant) RdfClient(org.wikidata.query.rdf.tool.rdf.client.RdfClient) URI(java.net.URI) Uris(org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris) TopicPartition(org.apache.kafka.common.TopicPartition) HttpClient(org.eclipse.jetty.client.HttpClient) HttpClientUtils.buildHttpClient(org.wikidata.query.rdf.tool.HttpClientUtils.buildHttpClient) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) Test(org.junit.Test)

Example 5 with Uris

use of org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris in project wikidata-query-rdf by wikimedia.

the class KafkaPollerEventConsumptionUnitTest method initPoller.

private void initPoller(String... clusterNames) {
    Uris uris = Uris.withWikidataDefaults("https://acme.test");
    URI root = null;
    try {
        root = uris.builder().build();
    } catch (URISyntaxException e) {
        fail("failed to build UriScheme", e);
    }
    KafkaOffsetsRepository kafkaOffsetsRepository = new RdfKafkaOffsetsRepository(root, null);
    Map<String, Class<? extends ChangeEvent>> topicsToClass = KafkaPoller.clusterNamesAwareTopics(Arrays.asList(clusterNames));
    deserializer = new JsonDeserializer<>(topicsToClass);
    poller = new KafkaPoller(consumer, uris, Instant.now(), 100, topicsToClass.keySet(), kafkaOffsetsRepository, true, new MetricRegistry());
}
Also used : ChangeEvent(org.wikidata.query.rdf.tool.change.events.ChangeEvent) MetricRegistry(com.codahale.metrics.MetricRegistry) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) Uris(org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris)

Aggregations

URI (java.net.URI)5 MetricRegistry (com.codahale.metrics.MetricRegistry)4 URISyntaxException (java.net.URISyntaxException)4 HashMap (java.util.HashMap)4 OffsetAndMetadata (org.apache.kafka.clients.consumer.OffsetAndMetadata)4 OffsetAndTimestamp (org.apache.kafka.clients.consumer.OffsetAndTimestamp)4 TopicPartition (org.apache.kafka.common.TopicPartition)4 Test (org.junit.Test)4 ImmutableList (com.google.common.collect.ImmutableList)3 ImmutableMap (com.google.common.collect.ImmutableMap)3 Maps (com.google.common.collect.Maps)3 Duration (java.time.Duration)3 ArrayList (java.util.ArrayList)3 Arrays (java.util.Arrays)3 Collection (java.util.Collection)3 Collections (java.util.Collections)3 Collections.singleton (java.util.Collections.singleton)3 Collections.singletonList (java.util.Collections.singletonList)3 List (java.util.List)3 Map (java.util.Map)3