use of org.wikidata.query.rdf.updater.DiffEventData in project wikidata-query-rdf by wikimedia.
the class PatchAccumulator method accumulateReconciliation.
private void accumulateReconciliation(List<MutationEventData> sequence) {
checkPositionIndex(0, sequence.size(), "Received empty sequence");
MutationEventData head = sequence.get(0);
Optional<MutationEventData> inconsistentBlock = sequence.stream().filter(m -> {
if (!head.getEntity().equals(m.getEntity())) {
return true;
} else if (!m.getMeta().requestId().equals(head.getMeta().requestId())) {
return true;
} else
return !head.getOperation().equals(m.getOperation());
}).findFirst();
if (inconsistentBlock.isPresent()) {
throw new IllegalArgumentException("Inconsistent sequence of events: " + inconsistentBlock.get() + " does not belong to " + head);
}
List<Statement> allStmts = sequence.stream().map(DiffEventData.class::cast).map(DiffEventData::getRdfAddedData).flatMap(c -> deserChunk(c).stream()).collect(toList());
reconciliations.put(head.getEntity(), allStmts);
// Drop patch data from this entity since we are reconciling it we will reset all that anyways
removeDataFromEntity(head.getEntity());
totalAccumulated += allStmts.size();
}
use of org.wikidata.query.rdf.updater.DiffEventData in project wikidata-query-rdf by wikimedia.
the class PatchAccumulator method accumulateDiff.
private void accumulateDiff(List<MutationEventData> sequence) {
MutationEventData head = sequence.get(0);
List<Statement> added = new ArrayList<>();
List<Statement> removed = new ArrayList<>();
List<Statement> linkedShared = new ArrayList<>();
List<Statement> unlinkedShared = new ArrayList<>();
for (MutationEventData data : sequence) {
if (!head.getClass().equals(data.getClass())) {
throw new IllegalArgumentException("Inconsistent chunks provided, head class " + head.getClass() + " does not match " + data.getClass());
}
if (!head.getMeta().requestId().equals(data.getMeta().requestId())) {
throw new IllegalArgumentException("Inconsistent chunks provided, head requestId " + head.getMeta().requestId() + " does not match " + data.getMeta().requestId());
}
DiffEventData diff = (DiffEventData) data;
if (diff.getRdfAddedData() != null) {
added.addAll(deserChunk(diff.getRdfAddedData()));
}
if (diff.getRdfDeletedData() != null) {
removed.addAll(deserChunk(diff.getRdfDeletedData()));
}
if (diff.getRdfLinkedSharedData() != null) {
linkedShared.addAll(deserChunk(diff.getRdfLinkedSharedData()));
}
if (diff.getRdfUnlinkedSharedData() != null) {
unlinkedShared.addAll(deserChunk(diff.getRdfUnlinkedSharedData()));
}
}
Patch patch = SiteLinksReclassification.reclassify(new Patch(added, linkedShared, removed, unlinkedShared));
accumulate(head.getEntity(), patch.getAdded(), patch.getRemoved(), patch.getLinkedSharedElements(), patch.getUnlinkedSharedElements());
}
use of org.wikidata.query.rdf.updater.DiffEventData in project wikidata-query-rdf by wikimedia.
the class KafkaStreamConsumerMetricsListenerUnitTest method test_metrics_are_reported.
@Test
public void test_metrics_are_reported() {
Instant now = Instant.now();
Clock fixedClock = Clock.fixed(now, ZoneOffset.UTC);
Duration lagEvt1 = Duration.ofHours(2);
Duration lagEvt2 = Duration.ofHours(1);
Instant evTime1 = now.minus(lagEvt1);
Instant evTime2 = now.minus(lagEvt2);
MutationEventData msg1 = new DiffEventData(new EventsMeta(Instant.now(), "unused", "domain", "stream", "req"), "Q0", 1, evTime1, 0, 1, MutationEventData.IMPORT_OPERATION, new RDFDataChunk("\n<uri:a> <uri:a> <uri:a> .\n", RDFFormat.TURTLE.getDefaultMIMEType()), null, null, null);
MutationEventData msg2 = new DiffEventData(new EventsMeta(Instant.now(), "unused", "domain", "stream", "req"), "Q0", 2, evTime2, 0, 1, MutationEventData.IMPORT_OPERATION, new RDFDataChunk("\n<uri:b> <uri:b> <uri:b> .\n", RDFFormat.TURTLE.getDefaultMIMEType()), null, null, null);
TopicPartition topicPartition = new TopicPartition("topic", 0);
when(consumer.poll(any())).thenReturn(new ConsumerRecords<>(singletonMap(topicPartition, singletonList(new ConsumerRecord<>(topicPartition.topic(), topicPartition.partition(), 0, null, msg1)))), new ConsumerRecords<>(singletonMap(topicPartition, singletonList(new ConsumerRecord<>(topicPartition.topic(), topicPartition.partition(), 1, null, msg2)))));
MetricRegistry registry = new MetricRegistry();
KafkaStreamConsumer streamConsumer = new KafkaStreamConsumer(consumer, topicPartition, chunkDeser, 1, new KafkaStreamConsumerMetricsListener(registry, fixedClock), m -> true);
streamConsumer.poll(Duration.ofMillis(0));
Gauge<Long> lag = registry.getGauges().get("kafka-stream-consumer-lag");
Counter offered = registry.getCounters().get("kafka-stream-consumer-triples-offered");
Counter accumulated = registry.getCounters().get("kafka-stream-consumer-triples-accumulated");
assertThat(lag.getValue()).isZero();
assertThat(offered.getCount()).isEqualTo(1);
assertThat(accumulated.getCount()).isEqualTo(1);
streamConsumer.acknowledge();
assertThat(lag.getValue()).isEqualTo(lagEvt1.toMillis());
streamConsumer.poll(Duration.ofMillis(0));
assertThat(offered.getCount()).isEqualTo(2);
assertThat(accumulated.getCount()).isEqualTo(2);
assertThat(lag.getValue()).isEqualTo(lagEvt1.toMillis());
streamConsumer.acknowledge();
assertThat(lag.getValue()).isEqualTo(lagEvt2.toMillis());
}
use of org.wikidata.query.rdf.updater.DiffEventData in project wikidata-query-rdf by wikimedia.
the class KafkaStreamConsumerUnitTest method test_prefer_reassembled_message.
@Test
public void test_prefer_reassembled_message() {
int bufferedMessages = 250;
TopicPartition topicPartition = new TopicPartition("test", 0);
List<ConsumerRecord<String, MutationEventData>> allRecords = IntStream.range(0, bufferedMessages).mapToObj(i -> {
EventsMeta meta = new EventsMeta(Instant.EPOCH, UUID.randomUUID().toString(), TEST_DOMAIN, TESTED_STREAM, "unused");
MutationEventData diff = new DiffEventData(meta, "Q1", 1, Instant.EPOCH, i, bufferedMessages, MutationEventData.DIFF_OPERATION, new RDFDataChunk("<uri:a> <uri:a> <uri:" + i + "> .\n", RDFFormat.TURTLE.getDefaultMIMEType()), null, null, null);
return new ConsumerRecord<String, MutationEventData>(topicPartition.topic(), topicPartition.partition(), i, null, diff);
}).collect(toList());
when(consumer.poll(any())).thenReturn(new ConsumerRecords<>(singletonMap(topicPartition, allRecords.subList(0, bufferedMessages / 2))), new ConsumerRecords<>(singletonMap(topicPartition, allRecords.subList(bufferedMessages / 2, allRecords.size()))), new ConsumerRecords<>(emptyMap()));
KafkaStreamConsumer streamConsumer = new KafkaStreamConsumer(consumer, topicPartition, chunkDeser, 10, KafkaStreamConsumerMetricsListener.forRegistry(new MetricRegistry()), m -> true);
StreamConsumer.Batch b = streamConsumer.poll(Duration.ofMillis(100));
assertThat(b).isNotNull();
ConsumerPatch patch = b.getPatch();
assertThat(patch.getAdded().size()).isEqualTo(bufferedMessages);
streamConsumer.acknowledge();
b = streamConsumer.poll(Duration.ofMillis(100));
assertThat(b).isNull();
}
use of org.wikidata.query.rdf.updater.DiffEventData in project wikidata-query-rdf by wikimedia.
the class PatchAccumulator method accumulate.
public void accumulate(List<MutationEventData> sequence) {
checkPositionIndex(0, sequence.size(), "Received empty sequence");
MutationEventData head = sequence.get(0);
checkArgument(canAccumulate(head), "Cannot accumulate data for entity: " + head.getEntity());
switch(head.getOperation()) {
case DELETE_OPERATION:
checkArgument(sequence.size() == 1, "Inconsistent delete mutation (" + sequence.size() + " chunks)");
accumulateDelete(head);
break;
case IMPORT_OPERATION:
case DIFF_OPERATION:
checkArgument(head instanceof DiffEventData, "Unsupported MutationEventData of type " + head.getOperation());
accumulateDiff(sequence);
break;
case RECONCILE_OPERATION:
checkArgument(head instanceof DiffEventData, "Unsupported MutationEventData of type " + head.getOperation());
accumulateReconciliation(sequence);
break;
default:
throw new UnsupportedOperationException("Unsupported operation [" + head.getOperation() + "]");
}
}
Aggregations