use of org.wikidata.query.rdf.tool.rdf.RdfRepositoryUpdater in project wikidata-query-rdf by wikimedia.
the class StreamingUpdate method build.
static StreamingUpdaterConsumer build(StreamingUpdateOptions options, MetricRegistry metrics) {
RDFChunkDeserializer deser = new RDFChunkDeserializer(new RDFParserSuppliers(RDFParserRegistry.getInstance()));
KafkaStreamConsumer consumer = KafkaStreamConsumer.build(options.brokers(), options.topic(), options.partition(), options.consumerGroup(), options.batchSize(), deser, parseInitialOffset(options), KafkaStreamConsumerMetricsListener.forRegistry(metrics), options.bufferedInputMessages(), buildFilter(StreamingUpdateOptions.entityFilterPattern(options)));
HttpClient httpClient = buildHttpClient(getHttpProxyHost(), getHttpProxyPort());
Retryer<ContentResponse> retryer = buildHttpClientRetryer();
Duration rdfClientTimeout = RdfRepositoryUpdater.getRdfClientTimeout();
RdfClient rdfClient = new RdfClient(httpClient, StreamingUpdateOptions.sparqlUri(options), retryer, rdfClientTimeout);
UrisScheme uris = UrisSchemeFactory.getURISystem();
return new StreamingUpdaterConsumer(consumer, new RdfRepositoryUpdater(rdfClient, uris), metrics, options.inconsistenciesWarningThreshold());
}
use of org.wikidata.query.rdf.tool.rdf.RdfRepositoryUpdater in project wikidata-query-rdf by wikimedia.
the class StreamingUpdaterConsumer method run.
public void run() {
try (StreamConsumer consumer = this.consumer;
RdfRepositoryUpdater repository = this.repository) {
// as unavailable preventing offsets to be committed.
while (!stop) {
StreamConsumer.Batch b = pollTimeCnt.time(() -> consumer.poll(TIMEOUT));
if (b == null) {
continue;
}
RDFPatchResult result = rdfStoreTimeCnt.time(() -> repository.applyPatch(b.getPatch(), b.getAverageEventTime()));
updateCounters(result);
if (passInconsistencyThreshold(result, inconsistencyWarningThreshold)) {
logger.warn("Applied batch with too many inconsistencies. {} for {}.", result, b);
}
consumer.acknowledge();
}
}
}
use of org.wikidata.query.rdf.tool.rdf.RdfRepositoryUpdater in project wikidata-query-rdf by wikimedia.
the class StreamingUpdaterConsumerUnitTest method test.
@Test
public void test() throws InterruptedException {
List<String> entityIdsToDelete = new ArrayList<String>();
Map<String, Collection<Statement>> reconciliations = new HashMap<>();
ConsumerPatch patch = new ConsumerPatch(statements(), statements(), statements(), statements(), entityIdsToDelete, reconciliations);
Instant avgEventTime = Instant.EPOCH.plus(4, ChronoUnit.MINUTES);
RDFPatchResult rdfPatchResult = new RDFPatchResult(2, 1, 2, 1, 1, 1);
LongAdder patchApplied = new LongAdder();
CountDownLatch countdown = new CountDownLatch(5);
Answer<StreamConsumer.Batch> batchSupplier = (i) -> new StreamConsumer.Batch(patch, avgEventTime, "1", Instant.now(), "2", Instant.now());
when(consumer.poll(any())).thenAnswer(batchSupplier);
when(rdfRepositoryUpdater.applyPatch(any(), any())).thenAnswer((Answer<RDFPatchResult>) i -> {
countdown.countDown();
patchApplied.increment();
return rdfPatchResult;
});
MetricRegistry registry = new MetricRegistry();
StreamingUpdaterConsumer updater = new StreamingUpdaterConsumer(consumer, rdfRepositoryUpdater, registry, 1F);
Thread t = new Thread(updater);
t.start();
// Wait for five patches to be applied and stop the updater
countdown.await();
updater.close();
t.join();
// Make sure that we called the methods the right number of times
// This updater does not much other than bridging a consumer and a repository
verify(consumer, times(patchApplied.intValue())).poll(any());
verify(consumer, times(patchApplied.intValue())).acknowledge();
verify(consumer, times(1)).close();
verify(rdfRepositoryUpdater, times(patchApplied.intValue())).applyPatch(same(patch), same(avgEventTime));
verify(rdfRepositoryUpdater, times(1)).close();
assertThat(registry.counter("mutations").getCount()).isEqualTo(patchApplied.intValue());
assertThat(registry.counter("delete-mutations").getCount()).isEqualTo(patchApplied.intValue());
assertThat(registry.counter("reconciliation-mutations").getCount()).isEqualTo(patchApplied.intValue());
assertThat(registry.counter("divergences").getCount()).isEqualTo(patchApplied.intValue());
assertThat(registry.counter("shared-element-mutations").getCount()).isEqualTo(patchApplied.intValue());
assertThat(registry.counter("shared-element-redundant-mutations").getCount()).isEqualTo(patchApplied.intValue());
}
Aggregations