Search in sources :

Example 1 with ConsumerPatch

use of org.wikidata.query.rdf.tool.rdf.ConsumerPatch in project wikidata-query-rdf by wikimedia.

the class KafkaStreamConsumerUnitTest method test_poll_accumulates_records_into_a_rdfpatch.

@Test
public void test_poll_accumulates_records_into_a_rdfpatch() {
    TopicPartition topicPartition = new TopicPartition("test", 0);
    // preferredBatchLength set to 4 end when reading event n°4 and not rely on timeouts to batch 5 and 6
    KafkaStreamConsumer streamConsumer = new KafkaStreamConsumer(consumer, topicPartition, chunkDeser, 8, KafkaStreamConsumerMetricsListener.forRegistry(new MetricRegistry()), m -> true);
    Instant evtTimeBase = Instant.EPOCH;
    Duration evtTimeIncrement = Duration.ofMinutes(2);
    List<ConsumerRecord<String, MutationEventData>> allRecords = recordsList(evtTimeBase, evtTimeIncrement);
    when(consumer.poll(any())).thenReturn(new ConsumerRecords<>(singletonMap(topicPartition, allRecords.subList(0, 2))), new ConsumerRecords<>(singletonMap(topicPartition, allRecords.subList(2, allRecords.size()))), new ConsumerRecords<>(emptyMap()));
    List<Instant> eventTimes = new ArrayList<>();
    List<StreamConsumer.Batch> batches = new ArrayList<>();
    while (true) {
        StreamConsumer.Batch b = streamConsumer.poll(Duration.ofMillis(1000));
        if (b == null) {
            break;
        }
        batches.add(b);
        ConsumerPatch patch = b.getPatch();
        eventTimes.add(b.getAverageEventTime());
        storedData.addAll(patch.getAdded());
        storedData.removeAll(patch.getRemoved());
        storedData.addAll(patch.getLinkedSharedElements());
        unlinkedSharedElts.addAll(patch.getUnlinkedSharedElements());
        deletedEntityIds.addAll(patch.getEntityIdsToDelete());
        streamConsumer.acknowledge();
    }
    streamConsumer.close();
    assertThat(eventTimes).containsExactly(Instant.EPOCH.plus(Duration.ofMinutes((2 + 2 * 2 + 2 * 3 + 2 * 4) / 4)), Instant.EPOCH.plus(Duration.ofMinutes((2 * 5 + 2 * 6) / 2)));
    assertThat(storedData).containsExactlyInAnyOrderElementsOf(expectedData);
    assertThat(unlinkedSharedElts.size()).isBetween(1, 2);
    if (unlinkedSharedElts.size() == 1) {
        assertThat(unlinkedSharedElts).containsExactlyInAnyOrderElementsOf(expectedUnlinkedElts);
    } else {
        assertThat(unlinkedSharedElts).containsExactlyInAnyOrderElementsOf(expectedUnlinkedEltsSingleOptimizedBatch);
    }
    assertThat(deletedEntityIds).containsOnly("Q2");
    assertThat(batches.stream().map(StreamConsumer.Batch::getBatchStartMsgId)).containsExactly("IMP-Q1-1", "IMP-Q2-1");
    assertThat(batches.stream().map(StreamConsumer.Batch::getBatchEndMsgId)).containsExactly("DIF-Q1-4", "DEL-Q2-2");
    assertThat(batches.stream().map(StreamConsumer.Batch::getBatchStartDt)).containsExactly(evtTimeBase.plus(evtTimeIncrement.multipliedBy(1)), evtTimeBase.plus(evtTimeIncrement.multipliedBy(5)));
    assertThat(batches.stream().map(StreamConsumer.Batch::getBatchEndDt)).containsExactly(evtTimeBase.plus(evtTimeIncrement.multipliedBy(4)), evtTimeBase.plus(evtTimeIncrement.multipliedBy(6)));
}
Also used : MetricRegistry(com.codahale.metrics.MetricRegistry) Instant(java.time.Instant) ArrayList(java.util.ArrayList) Duration(java.time.Duration) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) TopicPartition(org.apache.kafka.common.TopicPartition) ConsumerPatch(org.wikidata.query.rdf.tool.rdf.ConsumerPatch) Test(org.junit.Test)

Example 2 with ConsumerPatch

use of org.wikidata.query.rdf.tool.rdf.ConsumerPatch in project wikidata-query-rdf by wikimedia.

the class KafkaStreamConsumerUnitTest method test_poll_accumulates_records_with_delete_into_a_rdfpatch.

@Test
public void test_poll_accumulates_records_with_delete_into_a_rdfpatch() {
    TopicPartition topicPartition = new TopicPartition("test", 0);
    KafkaStreamConsumer streamConsumer = new KafkaStreamConsumer(consumer, topicPartition, chunkDeser, Integer.MAX_VALUE, KafkaStreamConsumerMetricsListener.forRegistry(new MetricRegistry()), m -> true);
    List<ConsumerRecord<String, MutationEventData>> allRecords = recordListWithDeleteAndAdd();
    when(consumer.poll(any())).thenReturn(new ConsumerRecords<>(singletonMap(topicPartition, allRecords)), new ConsumerRecords<>(emptyMap()));
    Set<Statement> actualStoredData = new HashSet<>();
    Set<String> actualDeletedEntities = new HashSet<>();
    Set<Statement> expectedAddedData = new HashSet<>(statements(uris("Q1-added-0", "Q1-shared")));
    while (true) {
        StreamConsumer.Batch b = streamConsumer.poll(Duration.ofMillis(100));
        if (b == null) {
            break;
        }
        ConsumerPatch patch = b.getPatch();
        actualStoredData.addAll(patch.getAdded());
        actualStoredData.removeAll(patch.getRemoved());
        actualStoredData.addAll(patch.getLinkedSharedElements());
        actualDeletedEntities.addAll(patch.getEntityIdsToDelete());
        streamConsumer.acknowledge();
    }
    streamConsumer.close();
    assertThat(actualStoredData).containsExactlyInAnyOrderElementsOf(expectedAddedData);
    assertThat(actualDeletedEntities).containsOnly("Q1");
}
Also used : Statement(org.openrdf.model.Statement) MetricRegistry(com.codahale.metrics.MetricRegistry) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) TopicPartition(org.apache.kafka.common.TopicPartition) HashSet(java.util.HashSet) ConsumerPatch(org.wikidata.query.rdf.tool.rdf.ConsumerPatch) Test(org.junit.Test)

Example 3 with ConsumerPatch

use of org.wikidata.query.rdf.tool.rdf.ConsumerPatch in project wikidata-query-rdf by wikimedia.

the class UpdatePatchAccumulatorUnitTest method test_add_then_remove_must_not_prune_unrelated_shared_triples.

@Test
public void test_add_then_remove_must_not_prune_unrelated_shared_triples() {
    PatchAccumulator accumulator = new PatchAccumulator(deserializer);
    accumulateDiff(accumulator, "Q1", singletonList(stmt("uri:added-Q1")), singletonList(stmt("uri:deleted-Q1")), asList(stmt("uri:linked-shared"), stmt("uri:linked-shared-only-for-Q1")), asList(stmt("uri:unlinked-shared"), stmt("uri:unlinked-shared-only-for-Q1")));
    accumulateDiff(accumulator, "Q2", singletonList(stmt("uri:added-Q2")), singletonList(stmt("uri:deleted-Q2")), asList(stmt("uri:linked-shared"), stmt("uri:linked-shared-only-for-Q2")), asList(stmt("uri:unlinked-shared"), stmt("uri:unlinked-shared-only-for-Q2")));
    accumulateDelete(accumulator, "Q1");
    ConsumerPatch actual = accumulator.asPatch();
    assertThat(actual.getLinkedSharedElements()).containsExactlyInAnyOrder(stmt("uri:linked-shared"), stmt("uri:linked-shared-only-for-Q2"));
    assertThat(actual.getUnlinkedSharedElements()).contains(stmt("uri:unlinked-shared"), stmt("uri:unlinked-shared-only-for-Q2"));
// It's OK to keep/not keep uri:unlinked-shared-only-for-Q1 here
}
Also used : ConsumerPatch(org.wikidata.query.rdf.tool.rdf.ConsumerPatch) Test(org.junit.Test)

Example 4 with ConsumerPatch

use of org.wikidata.query.rdf.tool.rdf.ConsumerPatch in project wikidata-query-rdf by wikimedia.

the class UpdatePatchAccumulatorUnitTest method test_leak_data_from_accumulator.

@Test
public void test_leak_data_from_accumulator() {
    MutationEventDataGenerator eventGenerator = new MutationEventDataGenerator(serializer, RDFFormat.TURTLE.getDefaultMIMEType(), 300);
    PatchAccumulator accumulator = new PatchAccumulator(deserializer);
    List<MutationEventData> events = eventGenerator.diffEvent(metaGenerator("Q1"), "Q1", 1, Instant.EPOCH, singletonList(stmt("uri:added-Q1")), singletonList(stmt("uri:deleted-Q1")), asList(stmt("uri:linked-shared"), stmt("uri:")), singletonList(stmt("uri:unlinked-shared")));
    events.forEach(accumulator::accumulate);
    ConsumerPatch expectedPatch = accumulator.asPatch();
    List<MutationEventData> events2 = eventGenerator.diffEvent(metaGenerator("Q2"), "Q2", 1, Instant.EPOCH, asList(stmt("uri:added-Q2"), stmt("uri:added-Q1")), singletonList(stmt("uri:deleted-Q1")), asList(stmt("uri:linked-shared"), stmt("uri:")), singletonList(stmt("uri:unlinked-shared")));
    assertThatThrownBy(() -> events2.forEach(accumulator::accumulate)).isInstanceOf(IllegalArgumentException.class);
    ConsumerPatch secondPatch = accumulator.asPatch();
    assertThat(secondPatch).isEqualTo(expectedPatch);
}
Also used : MutationEventDataGenerator(org.wikidata.query.rdf.updater.MutationEventDataGenerator) MutationEventData(org.wikidata.query.rdf.updater.MutationEventData) ConsumerPatch(org.wikidata.query.rdf.tool.rdf.ConsumerPatch) Test(org.junit.Test)

Example 5 with ConsumerPatch

use of org.wikidata.query.rdf.tool.rdf.ConsumerPatch in project wikidata-query-rdf by wikimedia.

the class UpdatePatchAccumulatorUnitTest method test_reconcile_operation_can_be_accumulated.

@Test
public void test_reconcile_operation_can_be_accumulated() {
    PatchAccumulator accumulator = new PatchAccumulator(deserializer);
    MutationEventDataGenerator bigChunkEventGenerator = new MutationEventDataGenerator(serializer, RDFFormat.TURTLE.getDefaultMIMEType(), Integer.MAX_VALUE);
    accumulateDiff(accumulator, "Q1", asList(stmt("uri:added-1"), stmt("uri:added-1")), asList(stmt("uri:removed-1"), stmt("uri:removed-1")), asList(stmt("uri:linked-shared"), stmt("uri:linked-shared-1")), asList(stmt("uri:unlinked-shared"), stmt("uri:unlinked-shared-1")), bigChunkEventGenerator);
    accumulateDiff(accumulator, "Q2", asList(stmt("uri:added-2"), stmt("uri:added-2")), asList(stmt("uri:removed-2"), stmt("uri:removed-2")), asList(stmt("uri:linked-shared"), stmt("uri:linked-shared-2")), asList(stmt("uri:unlinked-shared"), stmt("uri:unlinked-shared-2")), bigChunkEventGenerator);
    accumulateReconciliation(accumulator, "Q1", singletonList(stmt("uri:reconciled-1")));
    ConsumerPatch consumerPatch = accumulator.asPatch();
    assertThat(consumerPatch.getAdded()).containsExactlyInAnyOrder(stmt("uri:added-2"));
    assertThat(consumerPatch.getRemoved()).containsExactlyInAnyOrder(stmt("uri:removed-2"));
    assertThat(consumerPatch.getLinkedSharedElements()).containsExactlyInAnyOrder(stmt("uri:linked-shared"), stmt("uri:linked-shared-2"));
    assertThat(consumerPatch.getUnlinkedSharedElements()).contains(stmt("uri:unlinked-shared"), stmt("uri:unlinked-shared-2"));
    assertThat(consumerPatch.getReconciliations()).containsOnlyKeys("Q1");
    assertThat(consumerPatch.getReconciliations()).containsValue(singletonList(stmt("uri:reconciled-1")));
}
Also used : MutationEventDataGenerator(org.wikidata.query.rdf.updater.MutationEventDataGenerator) ConsumerPatch(org.wikidata.query.rdf.tool.rdf.ConsumerPatch) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)11 ConsumerPatch (org.wikidata.query.rdf.tool.rdf.ConsumerPatch)11 MetricRegistry (com.codahale.metrics.MetricRegistry)4 MutationEventDataGenerator (org.wikidata.query.rdf.updater.MutationEventDataGenerator)4 Instant (java.time.Instant)3 ArrayList (java.util.ArrayList)3 ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)3 Statement (org.openrdf.model.Statement)3 Duration (java.time.Duration)2 Collection (java.util.Collection)2 HashSet (java.util.HashSet)2 List (java.util.List)2 Map (java.util.Map)2 TopicPartition (org.apache.kafka.common.TopicPartition)2 Assertions.assertThat (org.assertj.core.api.Assertions.assertThat)2 RunWith (org.junit.runner.RunWith)2 Matchers.any (org.mockito.Matchers.any)2 Mock (org.mockito.Mock)2 Mockito.times (org.mockito.Mockito.times)2 Mockito.verify (org.mockito.Mockito.verify)2