Search in sources :

Example 1 with START_TIME

use of org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.START_TIME in project wikidata-query-rdf by wikimedia.

the class KafkaPollerUnitTest method storedOffsetsFromBoth.

@Test
public void storedOffsetsFromBoth() throws RetryableException {
    // Scenario where all offsets are loaded from both storage and timestamp
    Collection<String> topics = ImmutableList.of("topictest", "othertopic", "thirdtopic");
    KafkaOffsetsRepository offsetsRepository = mock(KafkaOffsetsRepository.class);
    createTopicPartitions(1);
    // capture args for assign
    ArgumentCaptor<Collection<TopicPartition>> assignArgs = ArgumentCaptor.forClass((Class) Collection.class);
    doNothing().when(consumer).assign(assignArgs.capture());
    // capture args for seek
    ArgumentCaptor<TopicPartition> seekTopics = ArgumentCaptor.forClass(TopicPartition.class);
    ArgumentCaptor<Long> seekOffsets = ArgumentCaptor.forClass(Long.class);
    doNothing().when(consumer).seek(seekTopics.capture(), seekOffsets.capture());
    // Stored offsets
    Map<TopicPartition, OffsetAndTimestamp> offsetMap = ImmutableMap.of(new TopicPartition("topictest", 0), new OffsetAndTimestamp(1, START_TIME.toEpochMilli()), new TopicPartition("othertopic", 0), new OffsetAndTimestamp(3, START_TIME.toEpochMilli()));
    when(offsetsRepository.load(any())).thenReturn(offsetMap);
    // Timestamp-driven offsets
    when(consumer.offsetsForTimes(any())).thenAnswer(i -> {
        Map<TopicPartition, Long> map = i.getArgumentAt(0, Map.class);
        // Check that timestamps are OK
        map.forEach((k, v) -> assertThat(v).isEqualTo(START_TIME.toEpochMilli()));
        // All offsets are 500
        return map.entrySet().stream().collect(Collectors.toMap(Entry::getKey, l -> new OffsetAndTimestamp(500L, l.getValue())));
    });
    when(consumer.poll(anyLong())).thenReturn(EMPTY_CHANGES);
    KafkaPoller poller = new KafkaPoller(consumer, uris, START_TIME, BATCH_SIZE, topics, offsetsRepository, false, new MetricRegistry());
    Batch batch = poller.firstBatch();
    // should not call offsetsForTimes, since all offsets are in store
    verify(consumer, times(1)).offsetsForTimes(any());
    // We assign to 3 topics
    verify(consumer, times(1)).assign(any());
    assertThat(assignArgs.getValue()).hasSize(topics.size());
    assertThat(seekOffsets.getAllValues()).hasSize(topics.size()).contains(// This offset is from timestamp
    500L);
}
Also used : Arrays(java.util.Arrays) ChangeEvent(org.wikidata.query.rdf.tool.change.events.ChangeEvent) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) URISyntaxException(java.net.URISyntaxException) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) Collections.singletonList(java.util.Collections.singletonList) Collections.singleton(java.util.Collections.singleton) Matchers.eq(org.mockito.Matchers.eq) Duration(java.time.Duration) Map(java.util.Map) URI(java.net.URI) TimestampType(org.apache.kafka.common.record.TimestampType) Uris(org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris) TopicPartition(org.apache.kafka.common.TopicPartition) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) Collection(java.util.Collection) Mockito.doNothing(org.mockito.Mockito.doNothing) PartitionInfo(org.apache.kafka.common.PartitionInfo) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) Collectors(java.util.stream.Collectors) Matchers.any(org.mockito.Matchers.any) START_TIME(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.START_TIME) List(java.util.List) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Entry(java.util.Map.Entry) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) Mockito.mock(org.mockito.Mockito.mock) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) Mock(org.mockito.Mock) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ArgumentCaptor(org.mockito.ArgumentCaptor) ImmutableList(com.google.common.collect.ImmutableList) Matchers.anyLong(org.mockito.Matchers.anyLong) RetryableException(org.wikidata.query.rdf.tool.exception.RetryableException) DOMAIN(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.DOMAIN) ChangeEventFixtures.makeDeleteEvent(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.makeDeleteEvent) Before(org.junit.Before) MetricRegistry(com.codahale.metrics.MetricRegistry) Mockito.times(org.mockito.Mockito.times) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) Maps(com.google.common.collect.Maps) Mockito.verify(org.mockito.Mockito.verify) Collectors.toList(java.util.stream.Collectors.toList) ChangeEventFixtures.makeRCEvent(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.makeRCEvent) MockitoJUnitRunner(org.mockito.runners.MockitoJUnitRunner) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) Collections(java.util.Collections) MetricRegistry(com.codahale.metrics.MetricRegistry) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) TopicPartition(org.apache.kafka.common.TopicPartition) Matchers.anyLong(org.mockito.Matchers.anyLong) Collection(java.util.Collection) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) Test(org.junit.Test)

Example 2 with START_TIME

use of org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.START_TIME in project wikidata-query-rdf by wikimedia.

the class KafkaPollerUnitTest method topicSubscribe.

@Test
public void topicSubscribe() throws RetryableException {
    Collection<String> topics = ImmutableList.of("topictest", "othertopic");
    // Each topic gets 2 partitions
    ArgumentCaptor<String> partitionArgs = ArgumentCaptor.forClass(String.class);
    createTopicPartitions(2, partitionArgs);
    // Capture args for assign
    ArgumentCaptor<Collection<TopicPartition>> assignArgs = ArgumentCaptor.forClass((Class) Collection.class);
    doNothing().when(consumer).assign(assignArgs.capture());
    when(consumer.offsetsForTimes(any())).thenAnswer(i -> {
        Map<TopicPartition, Long> map = i.getArgumentAt(0, Map.class);
        // Check that timestamps are OK
        map.forEach((k, v) -> assertThat(v).isEqualTo(START_TIME.toEpochMilli()));
        Map<TopicPartition, OffsetAndTimestamp> out = Maps.newHashMapWithExpectedSize(map.size());
        // Make offset 1 for first partition and nothing for second
        map.forEach((k, v) -> out.put(k, k.partition() == 0 ? new OffsetAndTimestamp(1000, v) : null));
        // Using forEach here because collect() can't handle nulls
        return out;
    });
    // capture args for seek
    ArgumentCaptor<TopicPartition> seekArgs = ArgumentCaptor.forClass(TopicPartition.class);
    doNothing().when(consumer).seek(seekArgs.capture(), eq(1000L));
    ArgumentCaptor<Collection<TopicPartition>> seekBeginningArgs = ArgumentCaptor.forClass((Class) Collection.class);
    doNothing().when(consumer).seekToEnd(seekBeginningArgs.capture());
    when(consumer.poll(anyLong())).thenReturn(EMPTY_CHANGES);
    KafkaPoller poller = new KafkaPoller(consumer, uris, START_TIME, BATCH_SIZE, topics, new DummyKafkaOffsetsRepository(), true, new MetricRegistry());
    Batch batch = poller.firstBatch();
    // We get partitions for both topics
    verify(consumer, times(2)).partitionsFor(any());
    assertThat(partitionArgs.getAllValues()).contains("topictest", "othertopic");
    // We assign to 4 topics - 2 topics x 2 partitions
    verify(consumer, times(1)).assign(any());
    assertThat(assignArgs.getValue()).hasSize(4);
    // Calling seek on both topics, partition 0
    verify(consumer, times(2)).seek(any(), anyLong());
    assertThat(seekArgs.getAllValues()).extracting(topicPartition -> topicPartition.topic()).contains("topictest", "othertopic");
    assertThat(seekArgs.getAllValues()).extracting(tp -> tp.partition()).hasSize(2).containsOnly(0);
    // Calling seekToEnd on both topics, partition 1
    verify(consumer, times(2)).seekToEnd(any());
    Collection<String> sbTopics = seekBeginningArgs.getAllValues().stream().flatMap(c -> c.stream()).map(tp -> tp.topic()).collect(toList());
    assertThat(sbTopics).hasSize(2).contains("topictest", "othertopic");
    Collection<Integer> sbPartitions = seekBeginningArgs.getAllValues().stream().flatMap(c -> c.stream()).map(tp -> tp.partition()).distinct().collect(toList());
    assertThat(sbPartitions).hasSize(1).contains(1);
    verify(consumer, times(1)).offsetsForTimes(any());
}
Also used : Arrays(java.util.Arrays) ChangeEvent(org.wikidata.query.rdf.tool.change.events.ChangeEvent) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) URISyntaxException(java.net.URISyntaxException) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) Collections.singletonList(java.util.Collections.singletonList) Collections.singleton(java.util.Collections.singleton) Matchers.eq(org.mockito.Matchers.eq) Duration(java.time.Duration) Map(java.util.Map) URI(java.net.URI) TimestampType(org.apache.kafka.common.record.TimestampType) Uris(org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris) TopicPartition(org.apache.kafka.common.TopicPartition) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) Collection(java.util.Collection) Mockito.doNothing(org.mockito.Mockito.doNothing) PartitionInfo(org.apache.kafka.common.PartitionInfo) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) Collectors(java.util.stream.Collectors) Matchers.any(org.mockito.Matchers.any) START_TIME(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.START_TIME) List(java.util.List) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Entry(java.util.Map.Entry) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) Mockito.mock(org.mockito.Mockito.mock) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) Mock(org.mockito.Mock) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ArgumentCaptor(org.mockito.ArgumentCaptor) ImmutableList(com.google.common.collect.ImmutableList) Matchers.anyLong(org.mockito.Matchers.anyLong) RetryableException(org.wikidata.query.rdf.tool.exception.RetryableException) DOMAIN(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.DOMAIN) ChangeEventFixtures.makeDeleteEvent(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.makeDeleteEvent) Before(org.junit.Before) MetricRegistry(com.codahale.metrics.MetricRegistry) Mockito.times(org.mockito.Mockito.times) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) Maps(com.google.common.collect.Maps) Mockito.verify(org.mockito.Mockito.verify) Collectors.toList(java.util.stream.Collectors.toList) ChangeEventFixtures.makeRCEvent(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.makeRCEvent) MockitoJUnitRunner(org.mockito.runners.MockitoJUnitRunner) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) Collections(java.util.Collections) MetricRegistry(com.codahale.metrics.MetricRegistry) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) TopicPartition(org.apache.kafka.common.TopicPartition) Matchers.anyLong(org.mockito.Matchers.anyLong) Collection(java.util.Collection) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) Test(org.junit.Test)

Example 3 with START_TIME

use of org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.START_TIME in project wikidata-query-rdf by wikimedia.

the class KafkaPollerUnitTest method storedOffsetsFromStorage.

@Test
public void storedOffsetsFromStorage() throws RetryableException {
    // Scenario where all offsets are loaded from storage
    Collection<String> topics = ImmutableList.of("topictest", "othertopic");
    KafkaOffsetsRepository offsetsRepository = mock(KafkaOffsetsRepository.class);
    createTopicPartitions(2);
    // capture args for assign
    ArgumentCaptor<Collection<TopicPartition>> assignArgs = ArgumentCaptor.forClass((Class) Collection.class);
    doNothing().when(consumer).assign(assignArgs.capture());
    // capture args for seek
    ArgumentCaptor<TopicPartition> seekTopics = ArgumentCaptor.forClass(TopicPartition.class);
    ArgumentCaptor<Long> seekOffsets = ArgumentCaptor.forClass(Long.class);
    doNothing().when(consumer).seek(seekTopics.capture(), seekOffsets.capture());
    Map<TopicPartition, OffsetAndTimestamp> offsetMap = ImmutableMap.of(new TopicPartition("topictest", 0), new OffsetAndTimestamp(1, START_TIME.toEpochMilli()), new TopicPartition("topictest", 1), new OffsetAndTimestamp(2, START_TIME.toEpochMilli()), new TopicPartition("othertopic", 0), new OffsetAndTimestamp(3, START_TIME.toEpochMilli()), new TopicPartition("othertopic", 1), new OffsetAndTimestamp(4, START_TIME.toEpochMilli()));
    when(offsetsRepository.load(any())).thenReturn(offsetMap);
    when(consumer.poll(anyLong())).thenReturn(EMPTY_CHANGES);
    KafkaPoller poller = new KafkaPoller(consumer, uris, START_TIME, BATCH_SIZE, topics, offsetsRepository, false, new MetricRegistry());
    Batch batch = poller.firstBatch();
    // should not call offsetsForTimes, since all offsets are in store
    verify(consumer, times(0)).offsetsForTimes(any());
    // We assign to 4 topics - 2 topics x 2 partitions
    verify(consumer, times(1)).assign(any());
    assertThat(assignArgs.getValue()).hasSize(4);
    // Verify topics and offsets
    assertThat(seekTopics.getAllValues()).containsExactlyInAnyOrderElementsOf(offsetMap.keySet());
    List<Long> offsets = offsetMap.values().stream().map(o -> o.offset()).collect(toList());
    assertThat(seekOffsets.getAllValues()).containsExactlyInAnyOrderElementsOf(offsets);
}
Also used : Arrays(java.util.Arrays) ChangeEvent(org.wikidata.query.rdf.tool.change.events.ChangeEvent) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) URISyntaxException(java.net.URISyntaxException) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) Collections.singletonList(java.util.Collections.singletonList) Collections.singleton(java.util.Collections.singleton) Matchers.eq(org.mockito.Matchers.eq) Duration(java.time.Duration) Map(java.util.Map) URI(java.net.URI) TimestampType(org.apache.kafka.common.record.TimestampType) Uris(org.wikidata.query.rdf.tool.wikibase.WikibaseRepository.Uris) TopicPartition(org.apache.kafka.common.TopicPartition) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) Collection(java.util.Collection) Mockito.doNothing(org.mockito.Mockito.doNothing) PartitionInfo(org.apache.kafka.common.PartitionInfo) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) Collectors(java.util.stream.Collectors) Matchers.any(org.mockito.Matchers.any) START_TIME(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.START_TIME) List(java.util.List) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Entry(java.util.Map.Entry) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) Mockito.mock(org.mockito.Mockito.mock) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) Mock(org.mockito.Mock) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ArgumentCaptor(org.mockito.ArgumentCaptor) ImmutableList(com.google.common.collect.ImmutableList) Matchers.anyLong(org.mockito.Matchers.anyLong) RetryableException(org.wikidata.query.rdf.tool.exception.RetryableException) DOMAIN(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.DOMAIN) ChangeEventFixtures.makeDeleteEvent(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.makeDeleteEvent) Before(org.junit.Before) MetricRegistry(com.codahale.metrics.MetricRegistry) Mockito.times(org.mockito.Mockito.times) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) Maps(com.google.common.collect.Maps) Mockito.verify(org.mockito.Mockito.verify) Collectors.toList(java.util.stream.Collectors.toList) ChangeEventFixtures.makeRCEvent(org.wikidata.query.rdf.tool.change.events.ChangeEventFixtures.makeRCEvent) MockitoJUnitRunner(org.mockito.runners.MockitoJUnitRunner) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) Collections(java.util.Collections) MetricRegistry(com.codahale.metrics.MetricRegistry) Batch(org.wikidata.query.rdf.tool.change.KafkaPoller.Batch) TopicPartition(org.apache.kafka.common.TopicPartition) Matchers.anyLong(org.mockito.Matchers.anyLong) Collection(java.util.Collection) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) Test(org.junit.Test)

Aggregations

MetricRegistry (com.codahale.metrics.MetricRegistry)3 ImmutableList (com.google.common.collect.ImmutableList)3 ImmutableMap (com.google.common.collect.ImmutableMap)3 Maps (com.google.common.collect.Maps)3 URI (java.net.URI)3 URISyntaxException (java.net.URISyntaxException)3 Duration (java.time.Duration)3 ArrayList (java.util.ArrayList)3 Arrays (java.util.Arrays)3 Collection (java.util.Collection)3 Collections (java.util.Collections)3 Collections.singleton (java.util.Collections.singleton)3 Collections.singletonList (java.util.Collections.singletonList)3 HashMap (java.util.HashMap)3 List (java.util.List)3 Map (java.util.Map)3 Entry (java.util.Map.Entry)3 Predicate (java.util.function.Predicate)3 Collectors (java.util.stream.Collectors)3 Collectors.toList (java.util.stream.Collectors.toList)3