Search in sources :

Example 6 with MutationEventData

use of org.wikidata.query.rdf.updater.MutationEventData in project wikidata-query-rdf by wikimedia.

the class KafkaStreamConsumerMetricsListenerUnitTest method test_metrics_are_reported.

@Test
public void test_metrics_are_reported() {
    Instant now = Instant.now();
    Clock fixedClock = Clock.fixed(now, ZoneOffset.UTC);
    Duration lagEvt1 = Duration.ofHours(2);
    Duration lagEvt2 = Duration.ofHours(1);
    Instant evTime1 = now.minus(lagEvt1);
    Instant evTime2 = now.minus(lagEvt2);
    MutationEventData msg1 = new DiffEventData(new EventsMeta(Instant.now(), "unused", "domain", "stream", "req"), "Q0", 1, evTime1, 0, 1, MutationEventData.IMPORT_OPERATION, new RDFDataChunk("\n<uri:a> <uri:a> <uri:a> .\n", RDFFormat.TURTLE.getDefaultMIMEType()), null, null, null);
    MutationEventData msg2 = new DiffEventData(new EventsMeta(Instant.now(), "unused", "domain", "stream", "req"), "Q0", 2, evTime2, 0, 1, MutationEventData.IMPORT_OPERATION, new RDFDataChunk("\n<uri:b> <uri:b> <uri:b> .\n", RDFFormat.TURTLE.getDefaultMIMEType()), null, null, null);
    TopicPartition topicPartition = new TopicPartition("topic", 0);
    when(consumer.poll(any())).thenReturn(new ConsumerRecords<>(singletonMap(topicPartition, singletonList(new ConsumerRecord<>(topicPartition.topic(), topicPartition.partition(), 0, null, msg1)))), new ConsumerRecords<>(singletonMap(topicPartition, singletonList(new ConsumerRecord<>(topicPartition.topic(), topicPartition.partition(), 1, null, msg2)))));
    MetricRegistry registry = new MetricRegistry();
    KafkaStreamConsumer streamConsumer = new KafkaStreamConsumer(consumer, topicPartition, chunkDeser, 1, new KafkaStreamConsumerMetricsListener(registry, fixedClock), m -> true);
    streamConsumer.poll(Duration.ofMillis(0));
    Gauge<Long> lag = registry.getGauges().get("kafka-stream-consumer-lag");
    Counter offered = registry.getCounters().get("kafka-stream-consumer-triples-offered");
    Counter accumulated = registry.getCounters().get("kafka-stream-consumer-triples-accumulated");
    assertThat(lag.getValue()).isZero();
    assertThat(offered.getCount()).isEqualTo(1);
    assertThat(accumulated.getCount()).isEqualTo(1);
    streamConsumer.acknowledge();
    assertThat(lag.getValue()).isEqualTo(lagEvt1.toMillis());
    streamConsumer.poll(Duration.ofMillis(0));
    assertThat(offered.getCount()).isEqualTo(2);
    assertThat(accumulated.getCount()).isEqualTo(2);
    assertThat(lag.getValue()).isEqualTo(lagEvt1.toMillis());
    streamConsumer.acknowledge();
    assertThat(lag.getValue()).isEqualTo(lagEvt2.toMillis());
}
Also used : Instant(java.time.Instant) MetricRegistry(com.codahale.metrics.MetricRegistry) DiffEventData(org.wikidata.query.rdf.updater.DiffEventData) Duration(java.time.Duration) Clock(java.time.Clock) MutationEventData(org.wikidata.query.rdf.updater.MutationEventData) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Counter(com.codahale.metrics.Counter) TopicPartition(org.apache.kafka.common.TopicPartition) EventsMeta(org.wikidata.query.rdf.tool.change.events.EventsMeta) RDFDataChunk(org.wikidata.query.rdf.updater.RDFDataChunk) Test(org.junit.Test)

Example 7 with MutationEventData

use of org.wikidata.query.rdf.updater.MutationEventData in project wikidata-query-rdf by wikimedia.

the class KafkaStreamConsumerUnitTest method test_prefer_reassembled_message.

@Test
public void test_prefer_reassembled_message() {
    int bufferedMessages = 250;
    TopicPartition topicPartition = new TopicPartition("test", 0);
    List<ConsumerRecord<String, MutationEventData>> allRecords = IntStream.range(0, bufferedMessages).mapToObj(i -> {
        EventsMeta meta = new EventsMeta(Instant.EPOCH, UUID.randomUUID().toString(), TEST_DOMAIN, TESTED_STREAM, "unused");
        MutationEventData diff = new DiffEventData(meta, "Q1", 1, Instant.EPOCH, i, bufferedMessages, MutationEventData.DIFF_OPERATION, new RDFDataChunk("<uri:a> <uri:a> <uri:" + i + "> .\n", RDFFormat.TURTLE.getDefaultMIMEType()), null, null, null);
        return new ConsumerRecord<String, MutationEventData>(topicPartition.topic(), topicPartition.partition(), i, null, diff);
    }).collect(toList());
    when(consumer.poll(any())).thenReturn(new ConsumerRecords<>(singletonMap(topicPartition, allRecords.subList(0, bufferedMessages / 2))), new ConsumerRecords<>(singletonMap(topicPartition, allRecords.subList(bufferedMessages / 2, allRecords.size()))), new ConsumerRecords<>(emptyMap()));
    KafkaStreamConsumer streamConsumer = new KafkaStreamConsumer(consumer, topicPartition, chunkDeser, 10, KafkaStreamConsumerMetricsListener.forRegistry(new MetricRegistry()), m -> true);
    StreamConsumer.Batch b = streamConsumer.poll(Duration.ofMillis(100));
    assertThat(b).isNotNull();
    ConsumerPatch patch = b.getPatch();
    assertThat(patch.getAdded().size()).isEqualTo(bufferedMessages);
    streamConsumer.acknowledge();
    b = streamConsumer.poll(Duration.ofMillis(100));
    assertThat(b).isNull();
}
Also used : Arrays(java.util.Arrays) DiffEventData(org.wikidata.query.rdf.updater.DiffEventData) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) BiFunction(java.util.function.BiFunction) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) StatementHelper.statements(org.wikidata.query.rdf.test.StatementHelper.statements) Collections.singletonList(java.util.Collections.singletonList) RDFDataChunk(org.wikidata.query.rdf.updater.RDFDataChunk) RDFFormat(org.openrdf.rio.RDFFormat) MutationEventData(org.wikidata.query.rdf.updater.MutationEventData) Matchers.eq(org.mockito.Matchers.eq) Duration(java.time.Duration) Map(java.util.Map) ConsumerPatch(org.wikidata.query.rdf.tool.rdf.ConsumerPatch) OffsetCommitCallback(org.apache.kafka.clients.consumer.OffsetCommitCallback) RDFChunkSerializer(org.wikidata.query.rdf.updater.RDFChunkSerializer) TopicPartition(org.apache.kafka.common.TopicPartition) RDFParserSuppliers(org.wikidata.query.rdf.tool.rdf.RDFParserSuppliers) Collection(java.util.Collection) Set(java.util.Set) UUID(java.util.UUID) Instant(java.time.Instant) Matchers.any(org.mockito.Matchers.any) List(java.util.List) Stream(java.util.stream.Stream) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) EventsMeta(org.wikidata.query.rdf.tool.change.events.EventsMeta) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) IntStream(java.util.stream.IntStream) Statement(org.openrdf.model.Statement) Mock(org.mockito.Mock) RunWith(org.junit.runner.RunWith) RDFParserRegistry(org.openrdf.rio.RDFParserRegistry) ArrayList(java.util.ArrayList) RDFWriterRegistry(org.openrdf.rio.RDFWriterRegistry) HashSet(java.util.HashSet) ArgumentCaptor(org.mockito.ArgumentCaptor) Collections.singletonMap(java.util.Collections.singletonMap) Collections.emptyMap(java.util.Collections.emptyMap) MetricRegistry(com.codahale.metrics.MetricRegistry) Mockito.times(org.mockito.Mockito.times) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) Mockito.verify(org.mockito.Mockito.verify) RDFChunkDeserializer(org.wikidata.query.rdf.updater.RDFChunkDeserializer) Collectors.toList(java.util.stream.Collectors.toList) MockitoJUnitRunner(org.mockito.runners.MockitoJUnitRunner) MutationEventDataGenerator(org.wikidata.query.rdf.updater.MutationEventDataGenerator) Collections(java.util.Collections) MetricRegistry(com.codahale.metrics.MetricRegistry) DiffEventData(org.wikidata.query.rdf.updater.DiffEventData) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) MutationEventData(org.wikidata.query.rdf.updater.MutationEventData) TopicPartition(org.apache.kafka.common.TopicPartition) EventsMeta(org.wikidata.query.rdf.tool.change.events.EventsMeta) RDFDataChunk(org.wikidata.query.rdf.updater.RDFDataChunk) ConsumerPatch(org.wikidata.query.rdf.tool.rdf.ConsumerPatch) Test(org.junit.Test)

Example 8 with MutationEventData

use of org.wikidata.query.rdf.updater.MutationEventData in project wikidata-query-rdf by wikimedia.

the class KafkaStreamConsumerUnitTest method test_messages_can_be_filtered.

@Test
public void test_messages_can_be_filtered() {
    TopicPartition topicPartition = new TopicPartition("topic", 0);
    MutationEventData event1 = genEvent("Q1", 1, uris("Q1-added-0"), uris(), uris("Q1-shared"), uris(), Instant.EPOCH).get(0);
    MutationEventData event2 = genEvent("L1", 1, uris("L1-added-0"), uris(), uris("L1-shared"), uris(), Instant.EPOCH).get(0);
    when(consumer.poll(any())).thenReturn(new ConsumerRecords<>(singletonMap(topicPartition, singletonList(new ConsumerRecord<>(TESTED_STREAM, 0, 1, null, event1)))), new ConsumerRecords<>(singletonMap(topicPartition, singletonList(new ConsumerRecord<>(TESTED_STREAM, 0, 2, null, event2)))), new ConsumerRecords<>(emptyMap()));
    KafkaStreamConsumer streamConsumer = new KafkaStreamConsumer(consumer, topicPartition, chunkDeser, 10, KafkaStreamConsumerMetricsListener.forRegistry(new MetricRegistry()), m -> m.getEntity().matches("^L.*"));
    StreamConsumer.Batch b = streamConsumer.poll(Duration.ofMillis(100));
    assertThat(b).isNotNull();
    assertThat(b.getPatch().getRemoved()).isEmpty();
    assertThat(b.getPatch().getUnlinkedSharedElements()).isEmpty();
    assertThat(b.getPatch().getAdded()).containsExactlyElementsOf(statements(uris("L1-added-0")));
    assertThat(b.getPatch().getLinkedSharedElements()).containsExactlyElementsOf(statements(uris("L1-shared")));
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) MetricRegistry(com.codahale.metrics.MetricRegistry) MutationEventData(org.wikidata.query.rdf.updater.MutationEventData) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Test(org.junit.Test)

Example 9 with MutationEventData

use of org.wikidata.query.rdf.updater.MutationEventData in project wikidata-query-rdf by wikimedia.

the class UpdatePatchAccumulatorUnitTest method test_remove_then_add_or_reconcile_should_not_be_supported_in_the_same_patch.

@Test
public void test_remove_then_add_or_reconcile_should_not_be_supported_in_the_same_patch() {
    PatchAccumulator accumulator = new PatchAccumulator(deserializer);
    accumulateDiff(accumulator, "UNRELATED", singletonList(stmt("uri:added-unrelated-entity")), singletonList(stmt("uri:deleted-unrelated-entity")), singletonList(stmt("uri:linked-shared")), singletonList(stmt("uri:unlinked-shared")));
    MutationEventData deleteEvent = eventGenerator.deleteEvent(metaGenerator("Q1"), "Q1", 1, Instant.EPOCH).get(0);
    assertThat(accumulator.canAccumulate(deleteEvent)).isTrue();
    accumulator.accumulate(deleteEvent);
    assertThat(accumulator.canAccumulate(deleteEvent)).withFailMessage("Deleting the same entity twice should accepted and be a no-op").isTrue();
    MutationEventData insertEvent = eventGenerator.fullImportEvent(metaGenerator("Q1"), "Q1", 1, Instant.EPOCH, singletonList(stmt("uri:added-for-Q1")), singletonList(stmt("uri:linked-shared"))).get(0);
    assertThat(accumulator.canAccumulate(insertEvent)).withFailMessage("Re-inserting the same entity after a delete in the same accumulator should not be supported").isFalse();
    assertThatThrownBy(() -> accumulateDiff(accumulator, "Q1", singletonList(stmt("uri:added-for-Q1")), singletonList(stmt("uri:linked-shared")), emptyList(), emptyList()), "Caller should call canAccumulate and not try to 'force-add' triples for an entity that is already accumulated as 'deleted'").isInstanceOf(IllegalArgumentException.class);
    MutationEventData reconcile = eventGenerator.reconcile(metaGenerator("Q1"), "Q1", 1, Instant.EPOCH, singletonList(stmt("uri:added-for-Q1"))).get(0);
    assertThat(accumulator.canAccumulate(reconcile)).withFailMessage("Reconciling the same entity after a delete in the same accumulator should not be supported").isFalse();
}
Also used : MutationEventData(org.wikidata.query.rdf.updater.MutationEventData) Test(org.junit.Test)

Example 10 with MutationEventData

use of org.wikidata.query.rdf.updater.MutationEventData in project wikidata-query-rdf by wikimedia.

the class PatchAccumulator method accumulate.

public void accumulate(List<MutationEventData> sequence) {
    checkPositionIndex(0, sequence.size(), "Received empty sequence");
    MutationEventData head = sequence.get(0);
    checkArgument(canAccumulate(head), "Cannot accumulate data for entity: " + head.getEntity());
    switch(head.getOperation()) {
        case DELETE_OPERATION:
            checkArgument(sequence.size() == 1, "Inconsistent delete mutation (" + sequence.size() + " chunks)");
            accumulateDelete(head);
            break;
        case IMPORT_OPERATION:
        case DIFF_OPERATION:
            checkArgument(head instanceof DiffEventData, "Unsupported MutationEventData of type " + head.getOperation());
            accumulateDiff(sequence);
            break;
        case RECONCILE_OPERATION:
            checkArgument(head instanceof DiffEventData, "Unsupported MutationEventData of type " + head.getOperation());
            accumulateReconciliation(sequence);
            break;
        default:
            throw new UnsupportedOperationException("Unsupported operation [" + head.getOperation() + "]");
    }
}
Also used : DiffEventData(org.wikidata.query.rdf.updater.DiffEventData) MutationEventData(org.wikidata.query.rdf.updater.MutationEventData)

Aggregations

MutationEventData (org.wikidata.query.rdf.updater.MutationEventData)10 Test (org.junit.Test)6 TopicPartition (org.apache.kafka.common.TopicPartition)5 DiffEventData (org.wikidata.query.rdf.updater.DiffEventData)5 MetricRegistry (com.codahale.metrics.MetricRegistry)4 ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)4 ArrayList (java.util.ArrayList)3 Statement (org.openrdf.model.Statement)3 ConsumerPatch (org.wikidata.query.rdf.tool.rdf.ConsumerPatch)3 Duration (java.time.Duration)2 Instant (java.time.Instant)2 Collection (java.util.Collection)2 Collections.singletonList (java.util.Collections.singletonList)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 List (java.util.List)2 Map (java.util.Map)2 Set (java.util.Set)2 Collectors.toList (java.util.stream.Collectors.toList)2 KafkaConsumer (org.apache.kafka.clients.consumer.KafkaConsumer)2