use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.
the class Updater method handleChanges.
/**
* Handle the changes in a batch.
*
* @throws InterruptedException if the process is interrupted while waiting
* on changes to sync
* @throws ExecutionException if there is an error syncing any of the
* changes
*/
protected void handleChanges(Iterable<Change> changes) throws InterruptedException, ExecutionException {
Set<Change> trueChanges = getRevisionUpdates(changes);
long start = System.currentTimeMillis();
List<Future<Change>> futureChanges = new ArrayList<>();
for (Change change : trueChanges) {
futureChanges.add(executor.submit(() -> {
while (true) {
try {
handleChange(change);
return change;
} catch (RetryableException e) {
log.warn("Retryable error syncing. Retrying.", e);
} catch (ContainedException e) {
log.warn("Contained error syncing. Giving up on {}", change.entityId(), e);
throw e;
}
}
}));
}
List<Change> processedChanges = new ArrayList<>();
for (Future<Change> f : futureChanges) {
try {
processedChanges.add(f.get());
} catch (ExecutionException ignore) {
// failure has already been logged
}
}
log.debug("Preparing update data took {} ms, have {} changes", System.currentTimeMillis() - start, processedChanges.size());
rdfRepository.syncFromChanges(processedChanges, verify);
updateMeter.mark(processedChanges.size());
}
use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.
the class RdfRepository method syncFromChanges.
/**
* Sync repository from changes list.
* @param changes List of changes.
* @return Number of triples modified.
*/
public int syncFromChanges(Collection<Change> changes, boolean verifyResult) {
if (changes.isEmpty()) {
// no changes, we're done
return 0;
}
UpdateBuilder b = new UpdateBuilder(msyncBody);
b.bindUri("schema:about", SchemaDotOrg.ABOUT);
b.bindUri("prov:wasDerivedFrom", Provenance.WAS_DERIVED_FROM);
b.bind("uris.value", uris.value());
b.bind("uris.statement", uris.statement());
Set<String> entityIds = newHashSetWithExpectedSize(changes.size());
List<Statement> insertStatements = new ArrayList<>();
List<Statement> entityStatements = new ArrayList<>();
List<Statement> statementStatements = new ArrayList<>();
Set<Statement> aboutStatements = new HashSet<>();
Set<String> valueSet = new HashSet<>();
for (final Change change : changes) {
if (change.getStatements() == null) {
// broken change, probably failed retrieval
continue;
}
entityIds.add(change.entityId());
insertStatements.addAll(change.getStatements());
classifyStatements(change.getStatements(), change.entityId(), entityStatements, statementStatements, aboutStatements);
valueSet.addAll(change.getCleanupList());
}
if (entityIds.isEmpty()) {
// If we've got no IDs, this means all change retrieval failed
log.debug("Got no valid changes, we're done");
return 0;
}
b.bindUris("entityList", entityIds, uris.entity());
b.bindStatements("insertStatements", insertStatements);
b.bindValues("entityStatements", entityStatements);
b.bindValues("statementStatements", statementStatements);
b.bindValues("aboutStatements", aboutStatements);
if (!valueSet.isEmpty()) {
UpdateBuilder cleanup = new UpdateBuilder(cleanUnused);
cleanup.bindUris("values", valueSet);
b.bind("cleanupQuery", cleanup.toString());
} else {
b.bind("cleanupQuery", "");
}
long start = System.currentTimeMillis();
int modified = execute("update", UPDATE_COUNT_RESPONSE, b.toString());
log.debug("Update query took {} millis and modified {} statements", System.currentTimeMillis() - start, modified);
if (verifyResult) {
try {
verifyStatements(entityIds, insertStatements);
} catch (QueryEvaluationException e) {
throw new FatalException("Can't load verify results: " + e, e);
}
}
return modified;
}
use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.
the class WikibaseRepositoryWireIntegrationTest method rdfNewUsesRevisions.
@Test
public void rdfNewUsesRevisions() throws RetryableException {
repository.setCollectConstraints(false);
repository.setRevisionCutoff(Duration.of(3, ChronoUnit.HOURS));
stubFor(get(urlMatching("/wiki/Special:EntityData/Q2.ttl[?]flavor=dump&revision=1234")).willReturn(aResponse().withBody("<a> <b> <c> .")));
Collection<Statement> response = repository.fetchRdfForEntity(new Change("Q2", 1234, Instant.now().minus(5, ChronoUnit.MINUTES), 0));
assertThat(response, hasSize(1));
}
use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.
the class RdfRepositoryForTesting method multiSync.
/**
* Run sync for single ID via multi-change API.
*/
private int multiSync(String entityId, Collection<org.openrdf.model.Statement> statements, Collection<String> valueList) {
Change change = new Change(entityId, -1, Instant.now(), -1);
change.setStatements(statements);
// FIXME: we should not conflate refs&values cleanups like that in test
change.setValueCleanupList(valueList);
change.setRefCleanupList(valueList);
int res = syncFromChanges(Collections.singleton(change), false).getMutationCount();
// This is because many tests do not know about timestamps which are later addition.
// This is the easiest way to make them ignore timestamps without complicating syncFromChanges too much.
int ts = rdfClient.update("DELETE { ?x wikibase:timestamp ?y } WHERE { ?x wikibase:timestamp ?y }");
return res - ts;
}
use of org.wikidata.query.rdf.tool.change.Change in project wikidata-query-rdf by wikimedia.
the class UpdaterUnitTest method testUpdateLeftOffTime.
@Test
public void testUpdateLeftOffTime() {
Instant leftOffInstant1 = Instant.ofEpochMilli(25);
Instant leftOffInstant2 = Instant.ofEpochSecond(40);
ImmutableList<Change> changes = ImmutableList.of(new Change("Q2", 1, Instant.ofEpochSecond(10), 2), new Change("Q3", 2, Instant.ofEpochMilli(20), 3));
TestChange batch1 = new TestChange(changes, 20, leftOffInstant1, false);
changes = ImmutableList.of(new Change("Q2", 1, Instant.ofEpochSecond(30), 4), new Change("Q3", 2, Instant.ofEpochMilli(40), 5));
TestChange batch2 = new TestChange(changes, 20, leftOffInstant2, true);
TestChangeSource source = new TestChangeSource(Arrays.asList(batch1, batch2));
WikibaseRepository wbRepo = mock(WikibaseRepository.class);
RdfRepository rdfRepo = mock(RdfRepository.class);
CollectedUpdateMetrics mutationCountOnlyMetrics = CollectedUpdateMetrics.getMutationCountOnlyMetrics(0);
when(rdfRepo.syncFromChanges(anyCollectionOf(Change.class), anyBoolean())).thenReturn(mutationCountOnlyMetrics);
Munger munger = Munger.builder(UrisSchemeFactory.WIKIDATA).build();
ExecutorService executorService = Executors.newFixedThreadPool(2, (r) -> new Thread(r, "Thread-" + this.getClass().getSimpleName()));
MetricRegistry metricRegistry = new MetricRegistry();
Updater<TestChange> updater = new Updater<>(source, wbRepo, rdfRepo, munger, executorService, true, 100, UrisSchemeFactory.WIKIDATA, false, metricRegistry);
updater.run();
verify(rdfRepo, times(2)).updateLeftOffTime(lestOffDateCaptor.capture());
assertThat(lestOffDateCaptor.getAllValues()).containsExactly(leftOffInstant1.minusSeconds(1), leftOffInstant2.minusSeconds(1));
assertThat(source.isBatchMarkedDone(batch1)).isTrue();
assertThat(source.isBatchMarkedDone(batch2)).isTrue();
}
Aggregations