Search in sources :

Example 1 with ContainedException

use of org.wikidata.query.rdf.tool.exception.ContainedException in project wikidata-query-rdf by wikimedia.

the class Updater method handleChanges.

/**
 * Handle the changes in a batch.
 *
 * @throws InterruptedException if the process is interrupted while waiting
 *             on changes to sync
 * @throws ExecutionException if there is an error syncing any of the
 *             changes
 */
protected void handleChanges(Iterable<Change> changes) throws InterruptedException, ExecutionException {
    Set<Change> trueChanges = getRevisionUpdates(changes);
    long start = System.currentTimeMillis();
    List<Future<Change>> futureChanges = new ArrayList<>();
    for (Change change : trueChanges) {
        futureChanges.add(executor.submit(() -> {
            while (true) {
                try {
                    handleChange(change);
                    return change;
                } catch (RetryableException e) {
                    log.warn("Retryable error syncing.  Retrying.", e);
                } catch (ContainedException e) {
                    log.warn("Contained error syncing.  Giving up on {}", change.entityId(), e);
                    throw e;
                }
            }
        }));
    }
    List<Change> processedChanges = new ArrayList<>();
    for (Future<Change> f : futureChanges) {
        try {
            processedChanges.add(f.get());
        } catch (ExecutionException ignore) {
        // failure has already been logged
        }
    }
    log.debug("Preparing update data took {} ms, have {} changes", System.currentTimeMillis() - start, processedChanges.size());
    rdfRepository.syncFromChanges(processedChanges, verify);
    updateMeter.mark(processedChanges.size());
}
Also used : RetryableException(org.wikidata.query.rdf.tool.exception.RetryableException) ArrayList(java.util.ArrayList) Future(java.util.concurrent.Future) Change(org.wikidata.query.rdf.tool.change.Change) ContainedException(org.wikidata.query.rdf.tool.exception.ContainedException) ExecutionException(java.util.concurrent.ExecutionException)

Example 2 with ContainedException

use of org.wikidata.query.rdf.tool.exception.ContainedException in project wikidata-query-rdf by wikimedia.

the class RdfRepository method execute.

/**
 * Execute some raw SPARQL.
 *
 * @param type name of the parameter in which to send sparql
 * @return results string from the server
 */
protected <T> T execute(String type, ResponseHandler<T> responseHandler, String sparql) {
    log.trace("Running SPARQL: {}", sparql);
    long startQuery = System.currentTimeMillis();
    // TODO we might want to look into Blazegraph's incremental update
    // reporting.....
    final ContentResponse response;
    try {
        response = retryer.call(() -> makeRequest(type, sparql, responseHandler.acceptHeader()).send());
        if (response.getStatus() != HttpStatus.OK_200) {
            throw new ContainedException("Non-200 response from triple store:  " + response + " body=\n" + responseBodyAsString(response));
        }
        log.debug("Completed in {} ms", System.currentTimeMillis() - startQuery);
        return responseHandler.parse(response);
    } catch (ExecutionException | RetryException | IOException e) {
        throw new FatalException("Error updating triple store", e);
    }
}
Also used : ContentResponse(org.eclipse.jetty.client.api.ContentResponse) FatalException(org.wikidata.query.rdf.tool.exception.FatalException) ContainedException(org.wikidata.query.rdf.tool.exception.ContainedException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) RetryException(com.github.rholder.retry.RetryException)

Example 3 with ContainedException

use of org.wikidata.query.rdf.tool.exception.ContainedException in project wikidata-query-rdf by wikimedia.

the class WikibaseRepository method fetchRdfForEntity.

/**
 * Fetch the RDF for some entity.
 *
 * @throws RetryableException thrown if there is an error communicating with
 *             wikibase
 */
public Collection<Statement> fetchRdfForEntity(String entityId) throws RetryableException {
    // TODO handle ?flavor=dump or whatever parameters we need
    URI uri = uris.rdf(entityId);
    long start = System.currentTimeMillis();
    log.debug("Fetching rdf from {}", uri);
    RDFParser parser = Rio.createParser(RDFFormat.TURTLE);
    StatementCollector collector = new StatementCollector();
    parser.setRDFHandler(new NormalizingRdfHandler(collector));
    HttpGet request = new HttpGet(uri);
    request.setConfig(configWithTimeout);
    try {
        try (CloseableHttpResponse response = client.execute(request)) {
            if (response.getStatusLine().getStatusCode() == 404) {
                // A delete/nonexistent page
                return Collections.emptyList();
            }
            if (response.getStatusLine().getStatusCode() >= 300) {
                throw new ContainedException("Unexpected status code fetching RDF for " + uri + ":  " + response.getStatusLine().getStatusCode());
            }
            parser.parse(new InputStreamReader(response.getEntity().getContent(), Charsets.UTF_8), uri.toString());
        }
    } catch (UnknownHostException | SocketException | SSLHandshakeException e) {
        // We want to bail on this, since it happens to be sticky for some reason
        throw new RuntimeException(e);
    } catch (IOException e) {
        throw new RetryableException("Error fetching RDF for " + uri, e);
    } catch (RDFParseException | RDFHandlerException e) {
        throw new ContainedException("RDF parsing error for " + uri, e);
    }
    log.debug("Done in {} ms", System.currentTimeMillis() - start);
    return collector.getStatements();
}
Also used : SocketException(java.net.SocketException) InputStreamReader(java.io.InputStreamReader) UnknownHostException(java.net.UnknownHostException) StatementCollector(org.openrdf.rio.helpers.StatementCollector) HttpGet(org.apache.http.client.methods.HttpGet) ContainedException(org.wikidata.query.rdf.tool.exception.ContainedException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) RDFParser(org.openrdf.rio.RDFParser) URI(java.net.URI) SSLHandshakeException(javax.net.ssl.SSLHandshakeException) NormalizingRdfHandler(org.wikidata.query.rdf.tool.rdf.NormalizingRdfHandler) RetryableException(org.wikidata.query.rdf.tool.exception.RetryableException) RDFHandlerException(org.openrdf.rio.RDFHandlerException) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) RDFParseException(org.openrdf.rio.RDFParseException)

Example 4 with ContainedException

use of org.wikidata.query.rdf.tool.exception.ContainedException in project wikidata-query-rdf by wikimedia.

the class WikibaseRepository method collectStatementsFromUrl.

/**
 * Collect TTL statements from single URL.
 * @throws RetryableException if there's a retryable error
 */
private void collectStatementsFromUrl(URI uri, StatementCollector collector, Timer timer) throws RetryableException {
    RDFParser parser = this.rdfParserSupplier.get(collector);
    HttpGet request = new HttpGet(uri);
    log.debug("Fetching rdf from {}", uri);
    try (Timer.Context timerContext = timer.time()) {
        try (CloseableHttpResponse response = client.execute(request)) {
            if (response.getStatusLine().getStatusCode() == 404) {
                throw new WikibaseEntityFetchException(uri, WikibaseEntityFetchException.Type.ENTITY_NOT_FOUND);
            }
            if (response.getStatusLine().getStatusCode() == 204) {
                throw new WikibaseEntityFetchException(uri, WikibaseEntityFetchException.Type.NO_CONTENT);
            }
            if (response.getStatusLine().getStatusCode() >= 300) {
                throw new WikibaseEntityFetchException(uri, WikibaseEntityFetchException.Type.UNEXPECTED_RESPONSE);
            }
            try (InputStream in = streamDumper.wrap(getInputStream(response))) {
                if (in == null) {
                    throw new WikibaseEntityFetchException(uri, WikibaseEntityFetchException.Type.EMPTY_RESPONSE);
                }
                parser.parse(new InputStreamReader(in, UTF_8), uri.toString());
            }
        }
    } catch (UnknownHostException | SocketException | SSLHandshakeException e) {
        // We want to bail on this, since it happens to be sticky for some reason
        throw new RuntimeException(e);
    } catch (IOException e) {
        throw new RetryableException("Error fetching RDF for " + uri, e);
    } catch (RDFParseException | RDFHandlerException e) {
        throw new ContainedException("RDF parsing error for " + uri, e);
    }
}
Also used : SocketException(java.net.SocketException) InputStreamReader(java.io.InputStreamReader) UnknownHostException(java.net.UnknownHostException) PushbackInputStream(java.io.PushbackInputStream) InputStream(java.io.InputStream) HttpGet(org.apache.http.client.methods.HttpGet) IOException(java.io.IOException) ContainedException(org.wikidata.query.rdf.tool.exception.ContainedException) RDFParser(org.openrdf.rio.RDFParser) SSLHandshakeException(javax.net.ssl.SSLHandshakeException) RetryableException(org.wikidata.query.rdf.tool.exception.RetryableException) Timer(com.codahale.metrics.Timer) RDFHandlerException(org.openrdf.rio.RDFHandlerException) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) RDFParseException(org.openrdf.rio.RDFParseException)

Example 5 with ContainedException

use of org.wikidata.query.rdf.tool.exception.ContainedException in project wikidata-query-rdf by wikimedia.

the class WikibaseRepository method fetchRdfForEntity.

/**
 * Fetch the RDF for some entity.
 * If revision is good (above 0) it will fetch by revision.
 * @throws RetryableException thrown if there is an error communicating with
 *             wikibase
 */
// stop() and close() are the same
@SuppressWarnings("resource")
public Collection<Statement> fetchRdfForEntity(String entityId, long revision) throws RetryableException {
    Timer.Context timerContext = rdfFetchTimer.time();
    StatementCollector collector = new StatementCollector();
    collectStatementsFromUrl(uris.rdf(entityId, revision), collector, entityFetchTimer);
    if (collectConstraints) {
        try {
            // TODO: constraints should probably handled by its own update pipeline
            // and possibly be stored in a dedicated graph
            // Re-using the same error detection patterns seems suspicious
            collectStatementsFromUrl(uris.constraints(entityId), collector, constraintFetchTimer);
        } catch (ContainedException ex) {
            // TODO: add RetryableException here?
            // Skip loading constraints on fail, it's not the reason to give up
            // on the whole item.
            log.info("Failed to load constraints: {}", ex.getMessage());
        }
    }
    log.debug("Done in {} ms", timerContext.stop() / 1000_000);
    return collector.getStatements();
}
Also used : Timer(com.codahale.metrics.Timer) StatementCollector(org.openrdf.rio.helpers.StatementCollector) ContainedException(org.wikidata.query.rdf.tool.exception.ContainedException)

Aggregations

ContainedException (org.wikidata.query.rdf.tool.exception.ContainedException)9 IOException (java.io.IOException)4 ExecutionException (java.util.concurrent.ExecutionException)4 RetryableException (org.wikidata.query.rdf.tool.exception.RetryableException)4 ArrayList (java.util.ArrayList)3 Timer (com.codahale.metrics.Timer)2 RetryException (com.github.rholder.retry.RetryException)2 InputStreamReader (java.io.InputStreamReader)2 SocketException (java.net.SocketException)2 UnknownHostException (java.net.UnknownHostException)2 Future (java.util.concurrent.Future)2 SSLHandshakeException (javax.net.ssl.SSLHandshakeException)2 CloseableHttpResponse (org.apache.http.client.methods.CloseableHttpResponse)2 HttpGet (org.apache.http.client.methods.HttpGet)2 ContentResponse (org.eclipse.jetty.client.api.ContentResponse)2 Statement (org.openrdf.model.Statement)2 RDFHandlerException (org.openrdf.rio.RDFHandlerException)2 RDFParseException (org.openrdf.rio.RDFParseException)2 RDFParser (org.openrdf.rio.RDFParser)2 StatementCollector (org.openrdf.rio.helpers.StatementCollector)2