Search in sources :

Example 11 with Change

use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.

the class DeferredChangesUnitTest method deferredUpdates.

@Test
public void deferredUpdates() {
    List<Change> originalChanges = ImmutableList.of(new Change("Q123", 100, Instant.EPOCH, 100), new Change("Q345", 101, Instant.EPOCH, 101));
    DeferredChanges deferredChanges = new DeferredChanges();
    Change delayedOK = new Change("Q567", 102, Instant.EPOCH, 102);
    Change delayedDup = new Change("Q123", 102, Instant.EPOCH, 102);
    Change delayedNotYet = new Change("Q890", 100, Instant.EPOCH, 100);
    deferredChanges.add(delayedOK, 0);
    deferredChanges.add(delayedDup, 0);
    deferredChanges.add(delayedNotYet, 1_000_000);
    Collection<Change> newChanges = deferredChanges.augmentWithDeferredChanges(originalChanges);
    // We expect the original Q123 and Q345, and Q567
    assertThat(newChanges).extracting(Change::entityId).containsExactly("Q123", "Q345", "Q567");
}
Also used : Change(org.wikidata.query.rdf.tool.change.Change) Test(org.junit.Test)

Example 12 with Change

use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.

the class Updater method fetchDataFromWikibaseAndMunge.

private List<Change> fetchDataFromWikibaseAndMunge(ChangesWithValuesAndRefs trueChanges) throws InterruptedException {
    List<Future<Change>> futureChanges = new ArrayList<>();
    for (Change change : trueChanges.changes) {
        futureChanges.add(executor.submit(() -> {
            while (true) {
                try {
                    String entityURI = uris.entityIdToURI(change.entityId());
                    Set<String> existingValues = trueChanges.repoValues.get(entityURI);
                    Set<String> existingRefs = trueChanges.repoRefs.get(entityURI);
                    handleChange(change, existingValues, existingRefs);
                    return change;
                } catch (RetryableException e) {
                    log.warn("Retryable error syncing.  Retrying.", e);
                } catch (ContainedException e) {
                    log.warn("Contained error syncing.  Giving up on {}", change.entityId(), e);
                    throw e;
                }
            }
        }));
    }
    List<Change> processedChanges = new ArrayList<>(futureChanges.size());
    for (Future<Change> f : futureChanges) {
        try {
            processedChanges.add(f.get());
        } catch (ExecutionException ignore) {
        // failure has already been logged
        }
    }
    return processedChanges;
}
Also used : HashSet(java.util.HashSet) EnumSet(java.util.EnumSet) Set(java.util.Set) RetryableException(org.wikidata.query.rdf.tool.exception.RetryableException) ArrayList(java.util.ArrayList) Future(java.util.concurrent.Future) Change(org.wikidata.query.rdf.tool.change.Change) ContainedException(org.wikidata.query.rdf.tool.exception.ContainedException) ExecutionException(java.util.concurrent.ExecutionException)

Example 13 with Change

use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.

the class Updater method getRevisionUpdates.

/**
 * Filter change by revisions.
 * The revisions that have the same or superior revision in the DB will be removed.
 * @param changes Collection of incoming changes.
 * @return A set of changes that need to be entered into the repository.
 */
@SuppressFBWarnings(value = { "UC_USELESS_OBJECT", "WOC_WRITE_ONLY_COLLECTION_LOCAL" }, justification = "see comment about T249196")
private ChangesWithValuesAndRefs getRevisionUpdates(Iterable<Change> changes) {
    // List of changes that indeed need update
    Set<Change> trueChanges = new HashSet<>();
    // List of entity URIs that were changed
    Set<String> changeIds = new HashSet<>();
    Map<String, Change> candidateChanges = new HashMap<>();
    for (final Change change : changes) {
        if (change.revision() > Change.NO_REVISION) {
            Change c = candidateChanges.get(change.entityId());
            if (c == null || c.revision() < change.revision()) {
                candidateChanges.put(change.entityId(), change);
            }
        } else {
            trueChanges.add(change);
            changeIds.add(uris.entityIdToURI(change.entityId()));
        }
    }
    if (candidateChanges.size() > 0) {
        for (String entityId : rdfRepository.hasRevisions(candidateChanges.values())) {
            // Cut off the entity prefix from the resulting URI
            changeIds.add(entityId);
            trueChanges.add(candidateChanges.get(uris.entityURItoId(entityId)));
        }
    }
    log.debug("Filtered batch contains {} changes", trueChanges.size());
    if (!trueChanges.isEmpty()) {
        /**
         * FIXME: either completely remove this feature or improve.
         *  disabled see T249196
         *            ImmutableSetMultimap<String, String> values = rdfRepository.getValues(changeIds);
         *            ImmutableSetMultimap<String, String> refs = rdfRepository.getRefs(changeIds);
         */
        ImmutableSetMultimap<String, String> values = ImmutableSetMultimap.of();
        ImmutableSetMultimap<String, String> refs = ImmutableSetMultimap.of();
        if (log.isDebugEnabled()) {
            log.debug("Fetched {} values", values.size());
            log.debug("Fetched {} refs", refs.size());
        }
        return new ChangesWithValuesAndRefs(trueChanges, values, refs);
    }
    return new ChangesWithValuesAndRefs(trueChanges, ImmutableSetMultimap.of(), ImmutableSetMultimap.of());
}
Also used : HashMap(java.util.HashMap) Change(org.wikidata.query.rdf.tool.change.Change) HashSet(java.util.HashSet) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings)

Example 14 with Change

use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.

the class RdfRepository method hasRevisions.

/**
 * Filter set of changes and see which of them really need to be updated.
 * The changes that have their revision or better in the repo do not need update.
 * @param candidates List of candidate changes
 * @return Set of entity IDs for which the update is needed.
 */
public Set<String> hasRevisions(Collection<Change> candidates) {
    UpdateBuilder b = new UpdateBuilder(getRevisions);
    StringBuilder values = new StringBuilder();
    for (Change entry : candidates) {
        values.append("( <").append(uris.entityIdToURI(entry.entityId())).append("> ").append(entry.revision()).append(" )\n");
    }
    b.bind("values", values.toString());
    b.bindUri("schema:version", SchemaDotOrg.VERSION);
    return resultToSet(rdfClient.query(b.toString()), "s");
}
Also used : Change(org.wikidata.query.rdf.tool.change.Change)

Aggregations

Change (org.wikidata.query.rdf.tool.change.Change)14 Test (org.junit.Test)7 Statement (org.openrdf.model.Statement)5 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 ExecutionException (java.util.concurrent.ExecutionException)2 Future (java.util.concurrent.Future)2 ContainedException (org.wikidata.query.rdf.tool.exception.ContainedException)2 RetryableException (org.wikidata.query.rdf.tool.exception.RetryableException)2 MetricRegistry (com.codahale.metrics.MetricRegistry)1 SuppressFBWarnings (edu.umd.cs.findbugs.annotations.SuppressFBWarnings)1 Instant (java.time.Instant)1 EnumSet (java.util.EnumSet)1 HashMap (java.util.HashMap)1 Set (java.util.Set)1 ExecutorService (java.util.concurrent.ExecutorService)1 LiteralImpl (org.openrdf.model.impl.LiteralImpl)1 QueryEvaluationException (org.openrdf.query.QueryEvaluationException)1 StatementBuilder (org.wikidata.query.rdf.test.StatementHelper.StatementBuilder)1 FatalException (org.wikidata.query.rdf.tool.exception.FatalException)1