use of org.wikidata.query.rdf.tool.rdf.ConsumerPatch in project wikidata-query-rdf by wikimedia.
the class KafkaStreamConsumerUnitTest method test_poll_accumulates_records_into_a_rdfpatch.
@Test
public void test_poll_accumulates_records_into_a_rdfpatch() {
TopicPartition topicPartition = new TopicPartition("test", 0);
// preferredBatchLength set to 4 end when reading event n°4 and not rely on timeouts to batch 5 and 6
KafkaStreamConsumer streamConsumer = new KafkaStreamConsumer(consumer, topicPartition, chunkDeser, 8, KafkaStreamConsumerMetricsListener.forRegistry(new MetricRegistry()), m -> true);
Instant evtTimeBase = Instant.EPOCH;
Duration evtTimeIncrement = Duration.ofMinutes(2);
List<ConsumerRecord<String, MutationEventData>> allRecords = recordsList(evtTimeBase, evtTimeIncrement);
when(consumer.poll(any())).thenReturn(new ConsumerRecords<>(singletonMap(topicPartition, allRecords.subList(0, 2))), new ConsumerRecords<>(singletonMap(topicPartition, allRecords.subList(2, allRecords.size()))), new ConsumerRecords<>(emptyMap()));
List<Instant> eventTimes = new ArrayList<>();
List<StreamConsumer.Batch> batches = new ArrayList<>();
while (true) {
StreamConsumer.Batch b = streamConsumer.poll(Duration.ofMillis(1000));
if (b == null) {
break;
}
batches.add(b);
ConsumerPatch patch = b.getPatch();
eventTimes.add(b.getAverageEventTime());
storedData.addAll(patch.getAdded());
storedData.removeAll(patch.getRemoved());
storedData.addAll(patch.getLinkedSharedElements());
unlinkedSharedElts.addAll(patch.getUnlinkedSharedElements());
deletedEntityIds.addAll(patch.getEntityIdsToDelete());
streamConsumer.acknowledge();
}
streamConsumer.close();
assertThat(eventTimes).containsExactly(Instant.EPOCH.plus(Duration.ofMinutes((2 + 2 * 2 + 2 * 3 + 2 * 4) / 4)), Instant.EPOCH.plus(Duration.ofMinutes((2 * 5 + 2 * 6) / 2)));
assertThat(storedData).containsExactlyInAnyOrderElementsOf(expectedData);
assertThat(unlinkedSharedElts.size()).isBetween(1, 2);
if (unlinkedSharedElts.size() == 1) {
assertThat(unlinkedSharedElts).containsExactlyInAnyOrderElementsOf(expectedUnlinkedElts);
} else {
assertThat(unlinkedSharedElts).containsExactlyInAnyOrderElementsOf(expectedUnlinkedEltsSingleOptimizedBatch);
}
assertThat(deletedEntityIds).containsOnly("Q2");
assertThat(batches.stream().map(StreamConsumer.Batch::getBatchStartMsgId)).containsExactly("IMP-Q1-1", "IMP-Q2-1");
assertThat(batches.stream().map(StreamConsumer.Batch::getBatchEndMsgId)).containsExactly("DIF-Q1-4", "DEL-Q2-2");
assertThat(batches.stream().map(StreamConsumer.Batch::getBatchStartDt)).containsExactly(evtTimeBase.plus(evtTimeIncrement.multipliedBy(1)), evtTimeBase.plus(evtTimeIncrement.multipliedBy(5)));
assertThat(batches.stream().map(StreamConsumer.Batch::getBatchEndDt)).containsExactly(evtTimeBase.plus(evtTimeIncrement.multipliedBy(4)), evtTimeBase.plus(evtTimeIncrement.multipliedBy(6)));
}
use of org.wikidata.query.rdf.tool.rdf.ConsumerPatch in project wikidata-query-rdf by wikimedia.
the class KafkaStreamConsumerUnitTest method test_poll_accumulates_records_with_delete_into_a_rdfpatch.
@Test
public void test_poll_accumulates_records_with_delete_into_a_rdfpatch() {
TopicPartition topicPartition = new TopicPartition("test", 0);
KafkaStreamConsumer streamConsumer = new KafkaStreamConsumer(consumer, topicPartition, chunkDeser, Integer.MAX_VALUE, KafkaStreamConsumerMetricsListener.forRegistry(new MetricRegistry()), m -> true);
List<ConsumerRecord<String, MutationEventData>> allRecords = recordListWithDeleteAndAdd();
when(consumer.poll(any())).thenReturn(new ConsumerRecords<>(singletonMap(topicPartition, allRecords)), new ConsumerRecords<>(emptyMap()));
Set<Statement> actualStoredData = new HashSet<>();
Set<String> actualDeletedEntities = new HashSet<>();
Set<Statement> expectedAddedData = new HashSet<>(statements(uris("Q1-added-0", "Q1-shared")));
while (true) {
StreamConsumer.Batch b = streamConsumer.poll(Duration.ofMillis(100));
if (b == null) {
break;
}
ConsumerPatch patch = b.getPatch();
actualStoredData.addAll(patch.getAdded());
actualStoredData.removeAll(patch.getRemoved());
actualStoredData.addAll(patch.getLinkedSharedElements());
actualDeletedEntities.addAll(patch.getEntityIdsToDelete());
streamConsumer.acknowledge();
}
streamConsumer.close();
assertThat(actualStoredData).containsExactlyInAnyOrderElementsOf(expectedAddedData);
assertThat(actualDeletedEntities).containsOnly("Q1");
}
use of org.wikidata.query.rdf.tool.rdf.ConsumerPatch in project wikidata-query-rdf by wikimedia.
the class UpdatePatchAccumulatorUnitTest method test_add_then_remove_must_not_prune_unrelated_shared_triples.
@Test
public void test_add_then_remove_must_not_prune_unrelated_shared_triples() {
PatchAccumulator accumulator = new PatchAccumulator(deserializer);
accumulateDiff(accumulator, "Q1", singletonList(stmt("uri:added-Q1")), singletonList(stmt("uri:deleted-Q1")), asList(stmt("uri:linked-shared"), stmt("uri:linked-shared-only-for-Q1")), asList(stmt("uri:unlinked-shared"), stmt("uri:unlinked-shared-only-for-Q1")));
accumulateDiff(accumulator, "Q2", singletonList(stmt("uri:added-Q2")), singletonList(stmt("uri:deleted-Q2")), asList(stmt("uri:linked-shared"), stmt("uri:linked-shared-only-for-Q2")), asList(stmt("uri:unlinked-shared"), stmt("uri:unlinked-shared-only-for-Q2")));
accumulateDelete(accumulator, "Q1");
ConsumerPatch actual = accumulator.asPatch();
assertThat(actual.getLinkedSharedElements()).containsExactlyInAnyOrder(stmt("uri:linked-shared"), stmt("uri:linked-shared-only-for-Q2"));
assertThat(actual.getUnlinkedSharedElements()).contains(stmt("uri:unlinked-shared"), stmt("uri:unlinked-shared-only-for-Q2"));
// It's OK to keep/not keep uri:unlinked-shared-only-for-Q1 here
}
use of org.wikidata.query.rdf.tool.rdf.ConsumerPatch in project wikidata-query-rdf by wikimedia.
the class UpdatePatchAccumulatorUnitTest method test_leak_data_from_accumulator.
@Test
public void test_leak_data_from_accumulator() {
MutationEventDataGenerator eventGenerator = new MutationEventDataGenerator(serializer, RDFFormat.TURTLE.getDefaultMIMEType(), 300);
PatchAccumulator accumulator = new PatchAccumulator(deserializer);
List<MutationEventData> events = eventGenerator.diffEvent(metaGenerator("Q1"), "Q1", 1, Instant.EPOCH, singletonList(stmt("uri:added-Q1")), singletonList(stmt("uri:deleted-Q1")), asList(stmt("uri:linked-shared"), stmt("uri:")), singletonList(stmt("uri:unlinked-shared")));
events.forEach(accumulator::accumulate);
ConsumerPatch expectedPatch = accumulator.asPatch();
List<MutationEventData> events2 = eventGenerator.diffEvent(metaGenerator("Q2"), "Q2", 1, Instant.EPOCH, asList(stmt("uri:added-Q2"), stmt("uri:added-Q1")), singletonList(stmt("uri:deleted-Q1")), asList(stmt("uri:linked-shared"), stmt("uri:")), singletonList(stmt("uri:unlinked-shared")));
assertThatThrownBy(() -> events2.forEach(accumulator::accumulate)).isInstanceOf(IllegalArgumentException.class);
ConsumerPatch secondPatch = accumulator.asPatch();
assertThat(secondPatch).isEqualTo(expectedPatch);
}
use of org.wikidata.query.rdf.tool.rdf.ConsumerPatch in project wikidata-query-rdf by wikimedia.
the class UpdatePatchAccumulatorUnitTest method test_reconcile_operation_can_be_accumulated.
@Test
public void test_reconcile_operation_can_be_accumulated() {
PatchAccumulator accumulator = new PatchAccumulator(deserializer);
MutationEventDataGenerator bigChunkEventGenerator = new MutationEventDataGenerator(serializer, RDFFormat.TURTLE.getDefaultMIMEType(), Integer.MAX_VALUE);
accumulateDiff(accumulator, "Q1", asList(stmt("uri:added-1"), stmt("uri:added-1")), asList(stmt("uri:removed-1"), stmt("uri:removed-1")), asList(stmt("uri:linked-shared"), stmt("uri:linked-shared-1")), asList(stmt("uri:unlinked-shared"), stmt("uri:unlinked-shared-1")), bigChunkEventGenerator);
accumulateDiff(accumulator, "Q2", asList(stmt("uri:added-2"), stmt("uri:added-2")), asList(stmt("uri:removed-2"), stmt("uri:removed-2")), asList(stmt("uri:linked-shared"), stmt("uri:linked-shared-2")), asList(stmt("uri:unlinked-shared"), stmt("uri:unlinked-shared-2")), bigChunkEventGenerator);
accumulateReconciliation(accumulator, "Q1", singletonList(stmt("uri:reconciled-1")));
ConsumerPatch consumerPatch = accumulator.asPatch();
assertThat(consumerPatch.getAdded()).containsExactlyInAnyOrder(stmt("uri:added-2"));
assertThat(consumerPatch.getRemoved()).containsExactlyInAnyOrder(stmt("uri:removed-2"));
assertThat(consumerPatch.getLinkedSharedElements()).containsExactlyInAnyOrder(stmt("uri:linked-shared"), stmt("uri:linked-shared-2"));
assertThat(consumerPatch.getUnlinkedSharedElements()).contains(stmt("uri:unlinked-shared"), stmt("uri:unlinked-shared-2"));
assertThat(consumerPatch.getReconciliations()).containsOnlyKeys("Q1");
assertThat(consumerPatch.getReconciliations()).containsValue(singletonList(stmt("uri:reconciled-1")));
}
Aggregations