Search in sources :

Example 1 with Change

use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.

the class Updater method handleChanges.

/**
 * Handle the changes in a batch.
 *
 * @throws InterruptedException if the process is interrupted while waiting
 *             on changes to sync
 * @throws ExecutionException if there is an error syncing any of the
 *             changes
 */
protected void handleChanges(Iterable<Change> changes) throws InterruptedException, ExecutionException {
    Set<Change> trueChanges = getRevisionUpdates(changes);
    long start = System.currentTimeMillis();
    List<Future<Change>> futureChanges = new ArrayList<>();
    for (Change change : trueChanges) {
        futureChanges.add(executor.submit(() -> {
            while (true) {
                try {
                    handleChange(change);
                    return change;
                } catch (RetryableException e) {
                    log.warn("Retryable error syncing.  Retrying.", e);
                } catch (ContainedException e) {
                    log.warn("Contained error syncing.  Giving up on {}", change.entityId(), e);
                    throw e;
                }
            }
        }));
    }
    List<Change> processedChanges = new ArrayList<>();
    for (Future<Change> f : futureChanges) {
        try {
            processedChanges.add(f.get());
        } catch (ExecutionException ignore) {
        // failure has already been logged
        }
    }
    log.debug("Preparing update data took {} ms, have {} changes", System.currentTimeMillis() - start, processedChanges.size());
    rdfRepository.syncFromChanges(processedChanges, verify);
    updateMeter.mark(processedChanges.size());
}
Also used : RetryableException(org.wikidata.query.rdf.tool.exception.RetryableException) ArrayList(java.util.ArrayList) Future(java.util.concurrent.Future) Change(org.wikidata.query.rdf.tool.change.Change) ContainedException(org.wikidata.query.rdf.tool.exception.ContainedException) ExecutionException(java.util.concurrent.ExecutionException)

Example 2 with Change

use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.

the class RdfRepository method syncFromChanges.

/**
 * Sync repository from changes list.
 * @param changes List of changes.
 * @return Number of triples modified.
 */
public int syncFromChanges(Collection<Change> changes, boolean verifyResult) {
    if (changes.isEmpty()) {
        // no changes, we're done
        return 0;
    }
    UpdateBuilder b = new UpdateBuilder(msyncBody);
    b.bindUri("schema:about", SchemaDotOrg.ABOUT);
    b.bindUri("prov:wasDerivedFrom", Provenance.WAS_DERIVED_FROM);
    b.bind("uris.value", uris.value());
    b.bind("uris.statement", uris.statement());
    Set<String> entityIds = newHashSetWithExpectedSize(changes.size());
    List<Statement> insertStatements = new ArrayList<>();
    List<Statement> entityStatements = new ArrayList<>();
    List<Statement> statementStatements = new ArrayList<>();
    Set<Statement> aboutStatements = new HashSet<>();
    Set<String> valueSet = new HashSet<>();
    for (final Change change : changes) {
        if (change.getStatements() == null) {
            // broken change, probably failed retrieval
            continue;
        }
        entityIds.add(change.entityId());
        insertStatements.addAll(change.getStatements());
        classifyStatements(change.getStatements(), change.entityId(), entityStatements, statementStatements, aboutStatements);
        valueSet.addAll(change.getCleanupList());
    }
    if (entityIds.isEmpty()) {
        // If we've got no IDs, this means all change retrieval failed
        log.debug("Got no valid changes, we're done");
        return 0;
    }
    b.bindUris("entityList", entityIds, uris.entity());
    b.bindStatements("insertStatements", insertStatements);
    b.bindValues("entityStatements", entityStatements);
    b.bindValues("statementStatements", statementStatements);
    b.bindValues("aboutStatements", aboutStatements);
    if (!valueSet.isEmpty()) {
        UpdateBuilder cleanup = new UpdateBuilder(cleanUnused);
        cleanup.bindUris("values", valueSet);
        b.bind("cleanupQuery", cleanup.toString());
    } else {
        b.bind("cleanupQuery", "");
    }
    long start = System.currentTimeMillis();
    int modified = execute("update", UPDATE_COUNT_RESPONSE, b.toString());
    log.debug("Update query took {} millis and modified {} statements", System.currentTimeMillis() - start, modified);
    if (verifyResult) {
        try {
            verifyStatements(entityIds, insertStatements);
        } catch (QueryEvaluationException e) {
            throw new FatalException("Can't load verify results: " + e, e);
        }
    }
    return modified;
}
Also used : FatalException(org.wikidata.query.rdf.tool.exception.FatalException) Statement(org.openrdf.model.Statement) ArrayList(java.util.ArrayList) Change(org.wikidata.query.rdf.tool.change.Change) QueryEvaluationException(org.openrdf.query.QueryEvaluationException) HashSet(java.util.HashSet)

Example 3 with Change

use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.

the class WikibaseRepositoryWireIntegrationTest method rdfNewUsesRevisions.

@Test
public void rdfNewUsesRevisions() throws RetryableException {
    repository.setCollectConstraints(false);
    repository.setRevisionCutoff(Duration.of(3, ChronoUnit.HOURS));
    stubFor(get(urlMatching("/wiki/Special:EntityData/Q2.ttl[?]flavor=dump&revision=1234")).willReturn(aResponse().withBody("<a> <b> <c> .")));
    Collection<Statement> response = repository.fetchRdfForEntity(new Change("Q2", 1234, Instant.now().minus(5, ChronoUnit.MINUTES), 0));
    assertThat(response, hasSize(1));
}
Also used : Statement(org.openrdf.model.Statement) Change(org.wikidata.query.rdf.tool.change.Change) Test(org.junit.Test)

Example 4 with Change

use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.

the class RdfRepositoryForTesting method multiSync.

/**
 * Run sync for single ID via multi-change API.
 */
private int multiSync(String entityId, Collection<org.openrdf.model.Statement> statements, Collection<String> valueList) {
    Change change = new Change(entityId, -1, Instant.now(), -1);
    change.setStatements(statements);
    // FIXME: we should not conflate refs&values cleanups like that in test
    change.setValueCleanupList(valueList);
    change.setRefCleanupList(valueList);
    int res = syncFromChanges(Collections.singleton(change), false).getMutationCount();
    // This is because many tests do not know about timestamps which are later addition.
    // This is the easiest way to make them ignore timestamps without complicating syncFromChanges too much.
    int ts = rdfClient.update("DELETE { ?x wikibase:timestamp ?y } WHERE { ?x wikibase:timestamp ?y }");
    return res - ts;
}
Also used : Change(org.wikidata.query.rdf.tool.change.Change)

Example 5 with Change

use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.

the class UpdaterUnitTest method testUpdateLeftOffTime.

@Test
public void testUpdateLeftOffTime() {
    Instant leftOffInstant1 = Instant.ofEpochMilli(25);
    Instant leftOffInstant2 = Instant.ofEpochSecond(40);
    ImmutableList<Change> changes = ImmutableList.of(new Change("Q2", 1, Instant.ofEpochSecond(10), 2), new Change("Q3", 2, Instant.ofEpochMilli(20), 3));
    TestChange batch1 = new TestChange(changes, 20, leftOffInstant1, false);
    changes = ImmutableList.of(new Change("Q2", 1, Instant.ofEpochSecond(30), 4), new Change("Q3", 2, Instant.ofEpochMilli(40), 5));
    TestChange batch2 = new TestChange(changes, 20, leftOffInstant2, true);
    TestChangeSource source = new TestChangeSource(Arrays.asList(batch1, batch2));
    WikibaseRepository wbRepo = mock(WikibaseRepository.class);
    RdfRepository rdfRepo = mock(RdfRepository.class);
    CollectedUpdateMetrics mutationCountOnlyMetrics = CollectedUpdateMetrics.getMutationCountOnlyMetrics(0);
    when(rdfRepo.syncFromChanges(anyCollectionOf(Change.class), anyBoolean())).thenReturn(mutationCountOnlyMetrics);
    Munger munger = Munger.builder(UrisSchemeFactory.WIKIDATA).build();
    ExecutorService executorService = Executors.newFixedThreadPool(2, (r) -> new Thread(r, "Thread-" + this.getClass().getSimpleName()));
    MetricRegistry metricRegistry = new MetricRegistry();
    Updater<TestChange> updater = new Updater<>(source, wbRepo, rdfRepo, munger, executorService, true, 100, UrisSchemeFactory.WIKIDATA, false, metricRegistry);
    updater.run();
    verify(rdfRepo, times(2)).updateLeftOffTime(lestOffDateCaptor.capture());
    assertThat(lestOffDateCaptor.getAllValues()).containsExactly(leftOffInstant1.minusSeconds(1), leftOffInstant2.minusSeconds(1));
    assertThat(source.isBatchMarkedDone(batch1)).isTrue();
    assertThat(source.isBatchMarkedDone(batch2)).isTrue();
}
Also used : CollectedUpdateMetrics(org.wikidata.query.rdf.tool.rdf.CollectedUpdateMetrics) Instant(java.time.Instant) Munger(org.wikidata.query.rdf.tool.rdf.Munger) MetricRegistry(com.codahale.metrics.MetricRegistry) WikibaseRepository(org.wikidata.query.rdf.tool.wikibase.WikibaseRepository) RdfRepository(org.wikidata.query.rdf.tool.rdf.RdfRepository) Change(org.wikidata.query.rdf.tool.change.Change) ExecutorService(java.util.concurrent.ExecutorService) Test(org.junit.Test)

Aggregations

Change (org.wikidata.query.rdf.tool.change.Change)14 Test (org.junit.Test)7 Statement (org.openrdf.model.Statement)5 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 ExecutionException (java.util.concurrent.ExecutionException)2 Future (java.util.concurrent.Future)2 ContainedException (org.wikidata.query.rdf.tool.exception.ContainedException)2 RetryableException (org.wikidata.query.rdf.tool.exception.RetryableException)2 MetricRegistry (com.codahale.metrics.MetricRegistry)1 SuppressFBWarnings (edu.umd.cs.findbugs.annotations.SuppressFBWarnings)1 Instant (java.time.Instant)1 EnumSet (java.util.EnumSet)1 HashMap (java.util.HashMap)1 Set (java.util.Set)1 ExecutorService (java.util.concurrent.ExecutorService)1 LiteralImpl (org.openrdf.model.impl.LiteralImpl)1 QueryEvaluationException (org.openrdf.query.QueryEvaluationException)1 StatementBuilder (org.wikidata.query.rdf.test.StatementHelper.StatementBuilder)1 FatalException (org.wikidata.query.rdf.tool.exception.FatalException)1