use of org.wikidata.query.rdf.tool.exception.ContainedException in project wikidata-query-rdf by wikimedia.
the class Updater method handleChanges.
/**
* Handle the changes in a batch.
*
* @throws InterruptedException if the process is interrupted while waiting
* on changes to sync
* @throws ExecutionException if there is an error syncing any of the
* changes
*/
protected void handleChanges(Iterable<Change> changes) throws InterruptedException, ExecutionException {
Set<Change> trueChanges = getRevisionUpdates(changes);
long start = System.currentTimeMillis();
List<Future<Change>> futureChanges = new ArrayList<>();
for (Change change : trueChanges) {
futureChanges.add(executor.submit(() -> {
while (true) {
try {
handleChange(change);
return change;
} catch (RetryableException e) {
log.warn("Retryable error syncing. Retrying.", e);
} catch (ContainedException e) {
log.warn("Contained error syncing. Giving up on {}", change.entityId(), e);
throw e;
}
}
}));
}
List<Change> processedChanges = new ArrayList<>();
for (Future<Change> f : futureChanges) {
try {
processedChanges.add(f.get());
} catch (ExecutionException ignore) {
// failure has already been logged
}
}
log.debug("Preparing update data took {} ms, have {} changes", System.currentTimeMillis() - start, processedChanges.size());
rdfRepository.syncFromChanges(processedChanges, verify);
updateMeter.mark(processedChanges.size());
}
use of org.wikidata.query.rdf.tool.exception.ContainedException in project wikidata-query-rdf by wikimedia.
the class RdfRepository method execute.
/**
* Execute some raw SPARQL.
*
* @param type name of the parameter in which to send sparql
* @return results string from the server
*/
protected <T> T execute(String type, ResponseHandler<T> responseHandler, String sparql) {
log.trace("Running SPARQL: {}", sparql);
long startQuery = System.currentTimeMillis();
// TODO we might want to look into Blazegraph's incremental update
// reporting.....
final ContentResponse response;
try {
response = retryer.call(() -> makeRequest(type, sparql, responseHandler.acceptHeader()).send());
if (response.getStatus() != HttpStatus.OK_200) {
throw new ContainedException("Non-200 response from triple store: " + response + " body=\n" + responseBodyAsString(response));
}
log.debug("Completed in {} ms", System.currentTimeMillis() - startQuery);
return responseHandler.parse(response);
} catch (ExecutionException | RetryException | IOException e) {
throw new FatalException("Error updating triple store", e);
}
}
use of org.wikidata.query.rdf.tool.exception.ContainedException in project wikidata-query-rdf by wikimedia.
the class WikibaseRepository method fetchRdfForEntity.
/**
* Fetch the RDF for some entity.
*
* @throws RetryableException thrown if there is an error communicating with
* wikibase
*/
public Collection<Statement> fetchRdfForEntity(String entityId) throws RetryableException {
// TODO handle ?flavor=dump or whatever parameters we need
URI uri = uris.rdf(entityId);
long start = System.currentTimeMillis();
log.debug("Fetching rdf from {}", uri);
RDFParser parser = Rio.createParser(RDFFormat.TURTLE);
StatementCollector collector = new StatementCollector();
parser.setRDFHandler(new NormalizingRdfHandler(collector));
HttpGet request = new HttpGet(uri);
request.setConfig(configWithTimeout);
try {
try (CloseableHttpResponse response = client.execute(request)) {
if (response.getStatusLine().getStatusCode() == 404) {
// A delete/nonexistent page
return Collections.emptyList();
}
if (response.getStatusLine().getStatusCode() >= 300) {
throw new ContainedException("Unexpected status code fetching RDF for " + uri + ": " + response.getStatusLine().getStatusCode());
}
parser.parse(new InputStreamReader(response.getEntity().getContent(), Charsets.UTF_8), uri.toString());
}
} catch (UnknownHostException | SocketException | SSLHandshakeException e) {
// We want to bail on this, since it happens to be sticky for some reason
throw new RuntimeException(e);
} catch (IOException e) {
throw new RetryableException("Error fetching RDF for " + uri, e);
} catch (RDFParseException | RDFHandlerException e) {
throw new ContainedException("RDF parsing error for " + uri, e);
}
log.debug("Done in {} ms", System.currentTimeMillis() - start);
return collector.getStatements();
}
use of org.wikidata.query.rdf.tool.exception.ContainedException in project wikidata-query-rdf by wikimedia.
the class WikibaseRepository method collectStatementsFromUrl.
/**
* Collect TTL statements from single URL.
* @throws RetryableException if there's a retryable error
*/
private void collectStatementsFromUrl(URI uri, StatementCollector collector, Timer timer) throws RetryableException {
RDFParser parser = this.rdfParserSupplier.get(collector);
HttpGet request = new HttpGet(uri);
log.debug("Fetching rdf from {}", uri);
try (Timer.Context timerContext = timer.time()) {
try (CloseableHttpResponse response = client.execute(request)) {
if (response.getStatusLine().getStatusCode() == 404) {
throw new WikibaseEntityFetchException(uri, WikibaseEntityFetchException.Type.ENTITY_NOT_FOUND);
}
if (response.getStatusLine().getStatusCode() == 204) {
throw new WikibaseEntityFetchException(uri, WikibaseEntityFetchException.Type.NO_CONTENT);
}
if (response.getStatusLine().getStatusCode() >= 300) {
throw new WikibaseEntityFetchException(uri, WikibaseEntityFetchException.Type.UNEXPECTED_RESPONSE);
}
try (InputStream in = streamDumper.wrap(getInputStream(response))) {
if (in == null) {
throw new WikibaseEntityFetchException(uri, WikibaseEntityFetchException.Type.EMPTY_RESPONSE);
}
parser.parse(new InputStreamReader(in, UTF_8), uri.toString());
}
}
} catch (UnknownHostException | SocketException | SSLHandshakeException e) {
// We want to bail on this, since it happens to be sticky for some reason
throw new RuntimeException(e);
} catch (IOException e) {
throw new RetryableException("Error fetching RDF for " + uri, e);
} catch (RDFParseException | RDFHandlerException e) {
throw new ContainedException("RDF parsing error for " + uri, e);
}
}
use of org.wikidata.query.rdf.tool.exception.ContainedException in project wikidata-query-rdf by wikimedia.
the class WikibaseRepository method fetchRdfForEntity.
/**
* Fetch the RDF for some entity.
* If revision is good (above 0) it will fetch by revision.
* @throws RetryableException thrown if there is an error communicating with
* wikibase
*/
// stop() and close() are the same
@SuppressWarnings("resource")
public Collection<Statement> fetchRdfForEntity(String entityId, long revision) throws RetryableException {
Timer.Context timerContext = rdfFetchTimer.time();
StatementCollector collector = new StatementCollector();
collectStatementsFromUrl(uris.rdf(entityId, revision), collector, entityFetchTimer);
if (collectConstraints) {
try {
// TODO: constraints should probably handled by its own update pipeline
// and possibly be stored in a dedicated graph
// Re-using the same error detection patterns seems suspicious
collectStatementsFromUrl(uris.constraints(entityId), collector, constraintFetchTimer);
} catch (ContainedException ex) {
// TODO: add RetryableException here?
// Skip loading constraints on fail, it's not the reason to give up
// on the whole item.
log.info("Failed to load constraints: {}", ex.getMessage());
}
}
log.debug("Done in {} ms", timerContext.stop() / 1000_000);
return collector.getStatements();
}
Aggregations