Search in sources :

Example 1 with DiffEventData

use of org.wikidata.query.rdf.updater.DiffEventData in project wikidata-query-rdf by wikimedia.

the class PatchAccumulator method accumulateReconciliation.

private void accumulateReconciliation(List<MutationEventData> sequence) {
    checkPositionIndex(0, sequence.size(), "Received empty sequence");
    MutationEventData head = sequence.get(0);
    Optional<MutationEventData> inconsistentBlock = sequence.stream().filter(m -> {
        if (!head.getEntity().equals(m.getEntity())) {
            return true;
        } else if (!m.getMeta().requestId().equals(head.getMeta().requestId())) {
            return true;
        } else
            return !head.getOperation().equals(m.getOperation());
    }).findFirst();
    if (inconsistentBlock.isPresent()) {
        throw new IllegalArgumentException("Inconsistent sequence of events: " + inconsistentBlock.get() + " does not belong to " + head);
    }
    List<Statement> allStmts = sequence.stream().map(DiffEventData.class::cast).map(DiffEventData::getRdfAddedData).flatMap(c -> deserChunk(c).stream()).collect(toList());
    reconciliations.put(head.getEntity(), allStmts);
    // Drop patch data from this entity since we are reconciling it we will reset all that anyways
    removeDataFromEntity(head.getEntity());
    totalAccumulated += allStmts.size();
}
Also used : Statement(org.openrdf.model.Statement) Collections.unmodifiableList(java.util.Collections.unmodifiableList) Getter(lombok.Getter) DiffEventData(org.wikidata.query.rdf.updater.DiffEventData) HashMap(java.util.HashMap) Collections.singletonList(java.util.Collections.singletonList) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) RDFDataChunk(org.wikidata.query.rdf.updater.RDFDataChunk) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Collectors.toMap(java.util.stream.Collectors.toMap) RECONCILE_OPERATION(org.wikidata.query.rdf.updater.MutationEventData.RECONCILE_OPERATION) MutationEventData(org.wikidata.query.rdf.updater.MutationEventData) Map(java.util.Map) ConsumerPatch(org.wikidata.query.rdf.tool.rdf.ConsumerPatch) DIFF_OPERATION(org.wikidata.query.rdf.updater.MutationEventData.DIFF_OPERATION) Preconditions.checkPositionIndex(com.google.common.base.Preconditions.checkPositionIndex) Collection(java.util.Collection) Set(java.util.Set) SiteLinksReclassification(org.wikidata.query.rdf.tool.rdf.SiteLinksReclassification) Patch(org.wikidata.query.rdf.tool.rdf.Patch) Sets(com.google.common.collect.Sets) RDFChunkDeserializer(org.wikidata.query.rdf.updater.RDFChunkDeserializer) Collectors.toList(java.util.stream.Collectors.toList) List(java.util.List) IMPORT_OPERATION(org.wikidata.query.rdf.updater.MutationEventData.IMPORT_OPERATION) Function.identity(java.util.function.Function.identity) Collections.unmodifiableMap(java.util.Collections.unmodifiableMap) Optional(java.util.Optional) DELETE_OPERATION(org.wikidata.query.rdf.updater.MutationEventData.DELETE_OPERATION) NotThreadSafe(javax.annotation.concurrent.NotThreadSafe) Statement(org.openrdf.model.Statement) DiffEventData(org.wikidata.query.rdf.updater.DiffEventData) MutationEventData(org.wikidata.query.rdf.updater.MutationEventData)

Example 2 with DiffEventData

use of org.wikidata.query.rdf.updater.DiffEventData in project wikidata-query-rdf by wikimedia.

the class PatchAccumulator method accumulateDiff.

private void accumulateDiff(List<MutationEventData> sequence) {
    MutationEventData head = sequence.get(0);
    List<Statement> added = new ArrayList<>();
    List<Statement> removed = new ArrayList<>();
    List<Statement> linkedShared = new ArrayList<>();
    List<Statement> unlinkedShared = new ArrayList<>();
    for (MutationEventData data : sequence) {
        if (!head.getClass().equals(data.getClass())) {
            throw new IllegalArgumentException("Inconsistent chunks provided, head class " + head.getClass() + " does not match " + data.getClass());
        }
        if (!head.getMeta().requestId().equals(data.getMeta().requestId())) {
            throw new IllegalArgumentException("Inconsistent chunks provided, head requestId " + head.getMeta().requestId() + " does not match " + data.getMeta().requestId());
        }
        DiffEventData diff = (DiffEventData) data;
        if (diff.getRdfAddedData() != null) {
            added.addAll(deserChunk(diff.getRdfAddedData()));
        }
        if (diff.getRdfDeletedData() != null) {
            removed.addAll(deserChunk(diff.getRdfDeletedData()));
        }
        if (diff.getRdfLinkedSharedData() != null) {
            linkedShared.addAll(deserChunk(diff.getRdfLinkedSharedData()));
        }
        if (diff.getRdfUnlinkedSharedData() != null) {
            unlinkedShared.addAll(deserChunk(diff.getRdfUnlinkedSharedData()));
        }
    }
    Patch patch = SiteLinksReclassification.reclassify(new Patch(added, linkedShared, removed, unlinkedShared));
    accumulate(head.getEntity(), patch.getAdded(), patch.getRemoved(), patch.getLinkedSharedElements(), patch.getUnlinkedSharedElements());
}
Also used : Statement(org.openrdf.model.Statement) ArrayList(java.util.ArrayList) DiffEventData(org.wikidata.query.rdf.updater.DiffEventData) ConsumerPatch(org.wikidata.query.rdf.tool.rdf.ConsumerPatch) Patch(org.wikidata.query.rdf.tool.rdf.Patch) MutationEventData(org.wikidata.query.rdf.updater.MutationEventData)

Example 3 with DiffEventData

use of org.wikidata.query.rdf.updater.DiffEventData in project wikidata-query-rdf by wikimedia.

the class KafkaStreamConsumerMetricsListenerUnitTest method test_metrics_are_reported.

@Test
public void test_metrics_are_reported() {
    Instant now = Instant.now();
    Clock fixedClock = Clock.fixed(now, ZoneOffset.UTC);
    Duration lagEvt1 = Duration.ofHours(2);
    Duration lagEvt2 = Duration.ofHours(1);
    Instant evTime1 = now.minus(lagEvt1);
    Instant evTime2 = now.minus(lagEvt2);
    MutationEventData msg1 = new DiffEventData(new EventsMeta(Instant.now(), "unused", "domain", "stream", "req"), "Q0", 1, evTime1, 0, 1, MutationEventData.IMPORT_OPERATION, new RDFDataChunk("\n<uri:a> <uri:a> <uri:a> .\n", RDFFormat.TURTLE.getDefaultMIMEType()), null, null, null);
    MutationEventData msg2 = new DiffEventData(new EventsMeta(Instant.now(), "unused", "domain", "stream", "req"), "Q0", 2, evTime2, 0, 1, MutationEventData.IMPORT_OPERATION, new RDFDataChunk("\n<uri:b> <uri:b> <uri:b> .\n", RDFFormat.TURTLE.getDefaultMIMEType()), null, null, null);
    TopicPartition topicPartition = new TopicPartition("topic", 0);
    when(consumer.poll(any())).thenReturn(new ConsumerRecords<>(singletonMap(topicPartition, singletonList(new ConsumerRecord<>(topicPartition.topic(), topicPartition.partition(), 0, null, msg1)))), new ConsumerRecords<>(singletonMap(topicPartition, singletonList(new ConsumerRecord<>(topicPartition.topic(), topicPartition.partition(), 1, null, msg2)))));
    MetricRegistry registry = new MetricRegistry();
    KafkaStreamConsumer streamConsumer = new KafkaStreamConsumer(consumer, topicPartition, chunkDeser, 1, new KafkaStreamConsumerMetricsListener(registry, fixedClock), m -> true);
    streamConsumer.poll(Duration.ofMillis(0));
    Gauge<Long> lag = registry.getGauges().get("kafka-stream-consumer-lag");
    Counter offered = registry.getCounters().get("kafka-stream-consumer-triples-offered");
    Counter accumulated = registry.getCounters().get("kafka-stream-consumer-triples-accumulated");
    assertThat(lag.getValue()).isZero();
    assertThat(offered.getCount()).isEqualTo(1);
    assertThat(accumulated.getCount()).isEqualTo(1);
    streamConsumer.acknowledge();
    assertThat(lag.getValue()).isEqualTo(lagEvt1.toMillis());
    streamConsumer.poll(Duration.ofMillis(0));
    assertThat(offered.getCount()).isEqualTo(2);
    assertThat(accumulated.getCount()).isEqualTo(2);
    assertThat(lag.getValue()).isEqualTo(lagEvt1.toMillis());
    streamConsumer.acknowledge();
    assertThat(lag.getValue()).isEqualTo(lagEvt2.toMillis());
}
Also used : Instant(java.time.Instant) MetricRegistry(com.codahale.metrics.MetricRegistry) DiffEventData(org.wikidata.query.rdf.updater.DiffEventData) Duration(java.time.Duration) Clock(java.time.Clock) MutationEventData(org.wikidata.query.rdf.updater.MutationEventData) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Counter(com.codahale.metrics.Counter) TopicPartition(org.apache.kafka.common.TopicPartition) EventsMeta(org.wikidata.query.rdf.tool.change.events.EventsMeta) RDFDataChunk(org.wikidata.query.rdf.updater.RDFDataChunk) Test(org.junit.Test)

Example 4 with DiffEventData

use of org.wikidata.query.rdf.updater.DiffEventData in project wikidata-query-rdf by wikimedia.

the class KafkaStreamConsumerUnitTest method test_prefer_reassembled_message.

@Test
public void test_prefer_reassembled_message() {
    int bufferedMessages = 250;
    TopicPartition topicPartition = new TopicPartition("test", 0);
    List<ConsumerRecord<String, MutationEventData>> allRecords = IntStream.range(0, bufferedMessages).mapToObj(i -> {
        EventsMeta meta = new EventsMeta(Instant.EPOCH, UUID.randomUUID().toString(), TEST_DOMAIN, TESTED_STREAM, "unused");
        MutationEventData diff = new DiffEventData(meta, "Q1", 1, Instant.EPOCH, i, bufferedMessages, MutationEventData.DIFF_OPERATION, new RDFDataChunk("<uri:a> <uri:a> <uri:" + i + "> .\n", RDFFormat.TURTLE.getDefaultMIMEType()), null, null, null);
        return new ConsumerRecord<String, MutationEventData>(topicPartition.topic(), topicPartition.partition(), i, null, diff);
    }).collect(toList());
    when(consumer.poll(any())).thenReturn(new ConsumerRecords<>(singletonMap(topicPartition, allRecords.subList(0, bufferedMessages / 2))), new ConsumerRecords<>(singletonMap(topicPartition, allRecords.subList(bufferedMessages / 2, allRecords.size()))), new ConsumerRecords<>(emptyMap()));
    KafkaStreamConsumer streamConsumer = new KafkaStreamConsumer(consumer, topicPartition, chunkDeser, 10, KafkaStreamConsumerMetricsListener.forRegistry(new MetricRegistry()), m -> true);
    StreamConsumer.Batch b = streamConsumer.poll(Duration.ofMillis(100));
    assertThat(b).isNotNull();
    ConsumerPatch patch = b.getPatch();
    assertThat(patch.getAdded().size()).isEqualTo(bufferedMessages);
    streamConsumer.acknowledge();
    b = streamConsumer.poll(Duration.ofMillis(100));
    assertThat(b).isNull();
}
Also used : Arrays(java.util.Arrays) DiffEventData(org.wikidata.query.rdf.updater.DiffEventData) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) BiFunction(java.util.function.BiFunction) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) StatementHelper.statements(org.wikidata.query.rdf.test.StatementHelper.statements) Collections.singletonList(java.util.Collections.singletonList) RDFDataChunk(org.wikidata.query.rdf.updater.RDFDataChunk) RDFFormat(org.openrdf.rio.RDFFormat) MutationEventData(org.wikidata.query.rdf.updater.MutationEventData) Matchers.eq(org.mockito.Matchers.eq) Duration(java.time.Duration) Map(java.util.Map) ConsumerPatch(org.wikidata.query.rdf.tool.rdf.ConsumerPatch) OffsetCommitCallback(org.apache.kafka.clients.consumer.OffsetCommitCallback) RDFChunkSerializer(org.wikidata.query.rdf.updater.RDFChunkSerializer) TopicPartition(org.apache.kafka.common.TopicPartition) RDFParserSuppliers(org.wikidata.query.rdf.tool.rdf.RDFParserSuppliers) Collection(java.util.Collection) Set(java.util.Set) UUID(java.util.UUID) Instant(java.time.Instant) Matchers.any(org.mockito.Matchers.any) List(java.util.List) Stream(java.util.stream.Stream) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) EventsMeta(org.wikidata.query.rdf.tool.change.events.EventsMeta) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) IntStream(java.util.stream.IntStream) Statement(org.openrdf.model.Statement) Mock(org.mockito.Mock) RunWith(org.junit.runner.RunWith) RDFParserRegistry(org.openrdf.rio.RDFParserRegistry) ArrayList(java.util.ArrayList) RDFWriterRegistry(org.openrdf.rio.RDFWriterRegistry) HashSet(java.util.HashSet) ArgumentCaptor(org.mockito.ArgumentCaptor) Collections.singletonMap(java.util.Collections.singletonMap) Collections.emptyMap(java.util.Collections.emptyMap) MetricRegistry(com.codahale.metrics.MetricRegistry) Mockito.times(org.mockito.Mockito.times) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) Mockito.verify(org.mockito.Mockito.verify) RDFChunkDeserializer(org.wikidata.query.rdf.updater.RDFChunkDeserializer) Collectors.toList(java.util.stream.Collectors.toList) MockitoJUnitRunner(org.mockito.runners.MockitoJUnitRunner) MutationEventDataGenerator(org.wikidata.query.rdf.updater.MutationEventDataGenerator) Collections(java.util.Collections) MetricRegistry(com.codahale.metrics.MetricRegistry) DiffEventData(org.wikidata.query.rdf.updater.DiffEventData) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) MutationEventData(org.wikidata.query.rdf.updater.MutationEventData) TopicPartition(org.apache.kafka.common.TopicPartition) EventsMeta(org.wikidata.query.rdf.tool.change.events.EventsMeta) RDFDataChunk(org.wikidata.query.rdf.updater.RDFDataChunk) ConsumerPatch(org.wikidata.query.rdf.tool.rdf.ConsumerPatch) Test(org.junit.Test)

Example 5 with DiffEventData

use of org.wikidata.query.rdf.updater.DiffEventData in project wikidata-query-rdf by wikimedia.

the class PatchAccumulator method accumulate.

public void accumulate(List<MutationEventData> sequence) {
    checkPositionIndex(0, sequence.size(), "Received empty sequence");
    MutationEventData head = sequence.get(0);
    checkArgument(canAccumulate(head), "Cannot accumulate data for entity: " + head.getEntity());
    switch(head.getOperation()) {
        case DELETE_OPERATION:
            checkArgument(sequence.size() == 1, "Inconsistent delete mutation (" + sequence.size() + " chunks)");
            accumulateDelete(head);
            break;
        case IMPORT_OPERATION:
        case DIFF_OPERATION:
            checkArgument(head instanceof DiffEventData, "Unsupported MutationEventData of type " + head.getOperation());
            accumulateDiff(sequence);
            break;
        case RECONCILE_OPERATION:
            checkArgument(head instanceof DiffEventData, "Unsupported MutationEventData of type " + head.getOperation());
            accumulateReconciliation(sequence);
            break;
        default:
            throw new UnsupportedOperationException("Unsupported operation [" + head.getOperation() + "]");
    }
}
Also used : DiffEventData(org.wikidata.query.rdf.updater.DiffEventData) MutationEventData(org.wikidata.query.rdf.updater.MutationEventData)

Aggregations

DiffEventData (org.wikidata.query.rdf.updater.DiffEventData)5 MutationEventData (org.wikidata.query.rdf.updater.MutationEventData)5 ArrayList (java.util.ArrayList)3 Statement (org.openrdf.model.Statement)3 MetricRegistry (com.codahale.metrics.MetricRegistry)2 Duration (java.time.Duration)2 Instant (java.time.Instant)2 Collection (java.util.Collection)2 Collections.singletonList (java.util.Collections.singletonList)2 HashSet (java.util.HashSet)2 List (java.util.List)2 Map (java.util.Map)2 Set (java.util.Set)2 Collectors.toList (java.util.stream.Collectors.toList)2 ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)2 TopicPartition (org.apache.kafka.common.TopicPartition)2 Test (org.junit.Test)2 ConsumerPatch (org.wikidata.query.rdf.tool.rdf.ConsumerPatch)2 Patch (org.wikidata.query.rdf.tool.rdf.Patch)2 RDFDataChunk (org.wikidata.query.rdf.updater.RDFDataChunk)2