Search in sources :

Example 1 with RdfRepositoryUpdater

use of org.wikidata.query.rdf.tool.rdf.RdfRepositoryUpdater in project wikidata-query-rdf by wikimedia.

the class StreamingUpdate method build.

static StreamingUpdaterConsumer build(StreamingUpdateOptions options, MetricRegistry metrics) {
    RDFChunkDeserializer deser = new RDFChunkDeserializer(new RDFParserSuppliers(RDFParserRegistry.getInstance()));
    KafkaStreamConsumer consumer = KafkaStreamConsumer.build(options.brokers(), options.topic(), options.partition(), options.consumerGroup(), options.batchSize(), deser, parseInitialOffset(options), KafkaStreamConsumerMetricsListener.forRegistry(metrics), options.bufferedInputMessages(), buildFilter(StreamingUpdateOptions.entityFilterPattern(options)));
    HttpClient httpClient = buildHttpClient(getHttpProxyHost(), getHttpProxyPort());
    Retryer<ContentResponse> retryer = buildHttpClientRetryer();
    Duration rdfClientTimeout = RdfRepositoryUpdater.getRdfClientTimeout();
    RdfClient rdfClient = new RdfClient(httpClient, StreamingUpdateOptions.sparqlUri(options), retryer, rdfClientTimeout);
    UrisScheme uris = UrisSchemeFactory.getURISystem();
    return new StreamingUpdaterConsumer(consumer, new RdfRepositoryUpdater(rdfClient, uris), metrics, options.inconsistenciesWarningThreshold());
}
Also used : RDFParserSuppliers(org.wikidata.query.rdf.tool.rdf.RDFParserSuppliers) ContentResponse(org.eclipse.jetty.client.api.ContentResponse) UrisScheme(org.wikidata.query.rdf.common.uri.UrisScheme) HttpClient(org.eclipse.jetty.client.HttpClient) HttpClientUtils.buildHttpClient(org.wikidata.query.rdf.tool.HttpClientUtils.buildHttpClient) RdfRepositoryUpdater(org.wikidata.query.rdf.tool.rdf.RdfRepositoryUpdater) Duration(java.time.Duration) RdfClient(org.wikidata.query.rdf.tool.rdf.client.RdfClient) RDFChunkDeserializer(org.wikidata.query.rdf.updater.RDFChunkDeserializer)

Example 2 with RdfRepositoryUpdater

use of org.wikidata.query.rdf.tool.rdf.RdfRepositoryUpdater in project wikidata-query-rdf by wikimedia.

the class StreamingUpdaterConsumer method run.

public void run() {
    try (StreamConsumer consumer = this.consumer;
        RdfRepositoryUpdater repository = this.repository) {
        // as unavailable preventing offsets to be committed.
        while (!stop) {
            StreamConsumer.Batch b = pollTimeCnt.time(() -> consumer.poll(TIMEOUT));
            if (b == null) {
                continue;
            }
            RDFPatchResult result = rdfStoreTimeCnt.time(() -> repository.applyPatch(b.getPatch(), b.getAverageEventTime()));
            updateCounters(result);
            if (passInconsistencyThreshold(result, inconsistencyWarningThreshold)) {
                logger.warn("Applied batch with too many inconsistencies. {} for {}.", result, b);
            }
            consumer.acknowledge();
        }
    }
}
Also used : RdfRepositoryUpdater(org.wikidata.query.rdf.tool.rdf.RdfRepositoryUpdater) RDFPatchResult(org.wikidata.query.rdf.tool.rdf.RDFPatchResult)

Example 3 with RdfRepositoryUpdater

use of org.wikidata.query.rdf.tool.rdf.RdfRepositoryUpdater in project wikidata-query-rdf by wikimedia.

the class StreamingUpdaterConsumerUnitTest method test.

@Test
public void test() throws InterruptedException {
    List<String> entityIdsToDelete = new ArrayList<String>();
    Map<String, Collection<Statement>> reconciliations = new HashMap<>();
    ConsumerPatch patch = new ConsumerPatch(statements(), statements(), statements(), statements(), entityIdsToDelete, reconciliations);
    Instant avgEventTime = Instant.EPOCH.plus(4, ChronoUnit.MINUTES);
    RDFPatchResult rdfPatchResult = new RDFPatchResult(2, 1, 2, 1, 1, 1);
    LongAdder patchApplied = new LongAdder();
    CountDownLatch countdown = new CountDownLatch(5);
    Answer<StreamConsumer.Batch> batchSupplier = (i) -> new StreamConsumer.Batch(patch, avgEventTime, "1", Instant.now(), "2", Instant.now());
    when(consumer.poll(any())).thenAnswer(batchSupplier);
    when(rdfRepositoryUpdater.applyPatch(any(), any())).thenAnswer((Answer<RDFPatchResult>) i -> {
        countdown.countDown();
        patchApplied.increment();
        return rdfPatchResult;
    });
    MetricRegistry registry = new MetricRegistry();
    StreamingUpdaterConsumer updater = new StreamingUpdaterConsumer(consumer, rdfRepositoryUpdater, registry, 1F);
    Thread t = new Thread(updater);
    t.start();
    // Wait for five patches to be applied and stop the updater
    countdown.await();
    updater.close();
    t.join();
    // Make sure that we called the methods the right number of times
    // This updater does not much other than bridging a consumer and a repository
    verify(consumer, times(patchApplied.intValue())).poll(any());
    verify(consumer, times(patchApplied.intValue())).acknowledge();
    verify(consumer, times(1)).close();
    verify(rdfRepositoryUpdater, times(patchApplied.intValue())).applyPatch(same(patch), same(avgEventTime));
    verify(rdfRepositoryUpdater, times(1)).close();
    assertThat(registry.counter("mutations").getCount()).isEqualTo(patchApplied.intValue());
    assertThat(registry.counter("delete-mutations").getCount()).isEqualTo(patchApplied.intValue());
    assertThat(registry.counter("reconciliation-mutations").getCount()).isEqualTo(patchApplied.intValue());
    assertThat(registry.counter("divergences").getCount()).isEqualTo(patchApplied.intValue());
    assertThat(registry.counter("shared-element-mutations").getCount()).isEqualTo(patchApplied.intValue());
    assertThat(registry.counter("shared-element-redundant-mutations").getCount()).isEqualTo(patchApplied.intValue());
}
Also used : Statement(org.openrdf.model.Statement) LongAdder(java.util.concurrent.atomic.LongAdder) RDFPatchResult(org.wikidata.query.rdf.tool.rdf.RDFPatchResult) Matchers.same(org.mockito.Matchers.same) StreamingUpdaterConsumer.passInconsistencyThreshold(org.wikidata.query.rdf.updater.consumer.StreamingUpdaterConsumer.passInconsistencyThreshold) Mock(org.mockito.Mock) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) StatementHelper.statements(org.wikidata.query.rdf.test.StatementHelper.statements) ArrayList(java.util.ArrayList) Answer(org.mockito.stubbing.Answer) Map(java.util.Map) ConsumerPatch(org.wikidata.query.rdf.tool.rdf.ConsumerPatch) MetricRegistry(com.codahale.metrics.MetricRegistry) Collection(java.util.Collection) Mockito.times(org.mockito.Mockito.times) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) Instant(java.time.Instant) Mockito.verify(org.mockito.Mockito.verify) Matchers.any(org.mockito.Matchers.any) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) ChronoUnit(java.time.temporal.ChronoUnit) MockitoJUnitRunner(org.mockito.runners.MockitoJUnitRunner) RdfRepositoryUpdater(org.wikidata.query.rdf.tool.rdf.RdfRepositoryUpdater) HashMap(java.util.HashMap) Instant(java.time.Instant) MetricRegistry(com.codahale.metrics.MetricRegistry) ArrayList(java.util.ArrayList) CountDownLatch(java.util.concurrent.CountDownLatch) LongAdder(java.util.concurrent.atomic.LongAdder) Collection(java.util.Collection) RDFPatchResult(org.wikidata.query.rdf.tool.rdf.RDFPatchResult) ConsumerPatch(org.wikidata.query.rdf.tool.rdf.ConsumerPatch) Test(org.junit.Test)

Aggregations

RdfRepositoryUpdater (org.wikidata.query.rdf.tool.rdf.RdfRepositoryUpdater)3 RDFPatchResult (org.wikidata.query.rdf.tool.rdf.RDFPatchResult)2 MetricRegistry (com.codahale.metrics.MetricRegistry)1 Duration (java.time.Duration)1 Instant (java.time.Instant)1 ChronoUnit (java.time.temporal.ChronoUnit)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 CountDownLatch (java.util.concurrent.CountDownLatch)1 LongAdder (java.util.concurrent.atomic.LongAdder)1 Assertions.assertThat (org.assertj.core.api.Assertions.assertThat)1 HttpClient (org.eclipse.jetty.client.HttpClient)1 ContentResponse (org.eclipse.jetty.client.api.ContentResponse)1 Test (org.junit.Test)1 RunWith (org.junit.runner.RunWith)1 Matchers.any (org.mockito.Matchers.any)1 Matchers.same (org.mockito.Matchers.same)1