use of org.wikidata.query.rdf.tool.exception.ContainedException in project wikidata-query-rdf by wikimedia.
the class EntityMungingRdfHandler method munge.
/**
* Munge an entity's worth of RDF and then sync it the the output.
*
* @throws RDFHandlerException if there is an error syncing it
*/
private void munge() throws RDFHandlerException {
try {
log.debug("Munging {}", entityId);
munger.munge(entityId, statements);
for (Statement statement : statements) {
output.handleStatement(statement);
}
entitiesMeter.mark();
if (entitiesMeter.getCount() % 10000 == 0) {
log.info("Processed {} entities at ({}, {}, {})", entitiesMeter.getCount(), (long) entitiesMeter.getOneMinuteRate(), (long) entitiesMeter.getFiveMinuteRate(), (long) entitiesMeter.getFifteenMinuteRate());
}
entityMetricConsumer.entitiesProcessed(entitiesMeter.getCount());
} catch (ContainedException e) {
log.warn("Error munging {}", entityId, e);
}
statements.clear();
haveNonEntityDataStatements = false;
}
use of org.wikidata.query.rdf.tool.exception.ContainedException in project wikidata-query-rdf by wikimedia.
the class RdfClient method execute.
/**
* Execute some raw SPARQL.
*
* @param type name of the parameter in which to send sparql
* @param <T> the type into which the result is parsed
* @return parsed results from the server
*/
private <T> T execute(String type, ResponseHandler<T> responseHandler, String sparql) {
log.trace("Running SPARQL: [{}] {}", sparql.length(), sparql);
long startQuery = System.currentTimeMillis();
// TODO we might want to look into Blazegraph's incremental update
// reporting.....
final ContentResponse response;
try {
response = retryer.call(() -> makeRequest(type, sparql, responseHandler.acceptHeader()).send());
if (response.getStatus() != OK_200) {
throw new ContainedException("Non-200 response from triple store: " + response + " body=\n" + response.getContentAsString());
}
log.debug("Completed in {} ms", System.currentTimeMillis() - startQuery);
return responseHandler.parse(response);
} catch (ExecutionException | RetryException | IOException e) {
throw new FatalException("Error accessing triple store", e);
}
}
use of org.wikidata.query.rdf.tool.exception.ContainedException in project wikidata-query-rdf by wikimedia.
the class Updater method fetchDataFromWikibaseAndMunge.
private List<Change> fetchDataFromWikibaseAndMunge(ChangesWithValuesAndRefs trueChanges) throws InterruptedException {
List<Future<Change>> futureChanges = new ArrayList<>();
for (Change change : trueChanges.changes) {
futureChanges.add(executor.submit(() -> {
while (true) {
try {
String entityURI = uris.entityIdToURI(change.entityId());
Set<String> existingValues = trueChanges.repoValues.get(entityURI);
Set<String> existingRefs = trueChanges.repoRefs.get(entityURI);
handleChange(change, existingValues, existingRefs);
return change;
} catch (RetryableException e) {
log.warn("Retryable error syncing. Retrying.", e);
} catch (ContainedException e) {
log.warn("Contained error syncing. Giving up on {}", change.entityId(), e);
throw e;
}
}
}));
}
List<Change> processedChanges = new ArrayList<>(futureChanges.size());
for (Future<Change> f : futureChanges) {
try {
processedChanges.add(f.get());
} catch (ExecutionException ignore) {
// failure has already been logged
}
}
return processedChanges;
}
use of org.wikidata.query.rdf.tool.exception.ContainedException in project wikidata-query-rdf by wikimedia.
the class Updater method handleChange.
/**
* Handle a change.
* <ul>
* <li>Check if the RDF store has the version of the page.
* <li>Fetch the RDF from the Wikibase install.
* <li>Add revision information to the statements if it isn't there already.
* <li>Sync data to the triple store.
* </ul>
*
* @throws RetryableException if there is a retryable error updating the rdf
* store
*/
private void handleChange(Change change, Set<String> repoValues, Set<String> repoRefs) throws RetryableException {
log.debug("Processing data for {}", change);
Collection<Statement> statements;
try {
statements = wikibase.fetchRdfForEntity(change);
} catch (WikibaseEntityFetchException e) {
if (DELETE_ENTITY_ERROR_TYPE.contains(e.getErrorType())) {
log.debug("Cannot fetch entity (deleting entity): ", e);
statements = new ArrayList<>();
} else {
throw new ContainedException("Received un-recoverable error fetching entity data for " + change.entityId(), e);
}
}
if (verify) {
Set<String> entityStmtsWithoutRank = statements.stream().collect(entityStatementsWithoutRank());
if (!entityStmtsWithoutRank.isEmpty()) {
log.warn("Found some statements without ranks while processing {}: {}", change.entityId(), entityStmtsWithoutRank);
}
}
Set<String> valuesToClean = Collections.emptySet();
Set<String> referencesToClean = Collections.emptySet();
if (!statements.isEmpty()) {
valuesToClean = RdfRepository.extractValuesToCleanup(repoValues, statements);
referencesToClean = RdfRepository.extractReferencesToCleanup(repoRefs, statements);
long fetchedRev = munger.munge(change.entityId(), statements);
// If we've got no statements, we have no usable loaded data, so no point in checking
// Same if we just got back our own change - no point in checking against it
final long sourceRev = change.revision();
if (sourceRev > 0 && fetchedRev > 0) {
if (fetchedRev < sourceRev) {
// Something weird happened - we've got stale revision!
log.warn("Stale revision on {}: change is {}, RDF is {}", change.entityId(), sourceRev, fetchedRev);
metricsRepository.incDeferredChanges();
deferredChanges.add(change, DEFERRAL_DELAY);
}
if (sourceRev < fetchedRev) {
// We skipped some revisions, let's count it in meter
metricsRepository.markSkipAhead();
}
}
}
/*
* TODO: we temporarily keep all the ref data because of the issues
* in https://phabricator.wikimedia.org/T194325
* see Change-ID Ia6c68a5b93e8c9a35310892904819c956ca9cd95
* or git commit hash 2931b5af725b7ab341dd60920710619fa249d1f2
* for more context
*/
referencesToClean = Collections.emptySet();
change.setRefCleanupList(referencesToClean);
/*
* TODO: we disable values cleanup to measure the impact on the lag
* see: T249196
*/
valuesToClean = Collections.emptySet();
change.setValueCleanupList(valuesToClean);
change.setStatements(statements);
}
Aggregations