Search in sources :

Example 6 with ContainedException

use of org.wikidata.query.rdf.tool.exception.ContainedException in project wikidata-query-rdf by wikimedia.

the class EntityMungingRdfHandler method munge.

/**
 * Munge an entity's worth of RDF and then sync it the the output.
 *
 * @throws RDFHandlerException if there is an error syncing it
 */
private void munge() throws RDFHandlerException {
    try {
        log.debug("Munging {}", entityId);
        munger.munge(entityId, statements);
        for (Statement statement : statements) {
            output.handleStatement(statement);
        }
        entitiesMeter.mark();
        if (entitiesMeter.getCount() % 10000 == 0) {
            log.info("Processed {} entities at ({}, {}, {})", entitiesMeter.getCount(), (long) entitiesMeter.getOneMinuteRate(), (long) entitiesMeter.getFiveMinuteRate(), (long) entitiesMeter.getFifteenMinuteRate());
        }
        entityMetricConsumer.entitiesProcessed(entitiesMeter.getCount());
    } catch (ContainedException e) {
        log.warn("Error munging {}", entityId, e);
    }
    statements.clear();
    haveNonEntityDataStatements = false;
}
Also used : Statement(org.openrdf.model.Statement) StatementPredicates.dumpStatement(org.wikidata.query.rdf.tool.rdf.StatementPredicates.dumpStatement) ContainedException(org.wikidata.query.rdf.tool.exception.ContainedException)

Example 7 with ContainedException

use of org.wikidata.query.rdf.tool.exception.ContainedException in project wikidata-query-rdf by wikimedia.

the class RdfClient method execute.

/**
 * Execute some raw SPARQL.
 *
 * @param type name of the parameter in which to send sparql
 * @param <T> the type into which the result is parsed
 * @return parsed results from the server
 */
private <T> T execute(String type, ResponseHandler<T> responseHandler, String sparql) {
    log.trace("Running SPARQL: [{}] {}", sparql.length(), sparql);
    long startQuery = System.currentTimeMillis();
    // TODO we might want to look into Blazegraph's incremental update
    // reporting.....
    final ContentResponse response;
    try {
        response = retryer.call(() -> makeRequest(type, sparql, responseHandler.acceptHeader()).send());
        if (response.getStatus() != OK_200) {
            throw new ContainedException("Non-200 response from triple store:  " + response + " body=\n" + response.getContentAsString());
        }
        log.debug("Completed in {} ms", System.currentTimeMillis() - startQuery);
        return responseHandler.parse(response);
    } catch (ExecutionException | RetryException | IOException e) {
        throw new FatalException("Error accessing triple store", e);
    }
}
Also used : ContentResponse(org.eclipse.jetty.client.api.ContentResponse) FatalException(org.wikidata.query.rdf.tool.exception.FatalException) ContainedException(org.wikidata.query.rdf.tool.exception.ContainedException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) RetryException(com.github.rholder.retry.RetryException)

Example 8 with ContainedException

use of org.wikidata.query.rdf.tool.exception.ContainedException in project wikidata-query-rdf by wikimedia.

the class Updater method fetchDataFromWikibaseAndMunge.

private List<Change> fetchDataFromWikibaseAndMunge(ChangesWithValuesAndRefs trueChanges) throws InterruptedException {
    List<Future<Change>> futureChanges = new ArrayList<>();
    for (Change change : trueChanges.changes) {
        futureChanges.add(executor.submit(() -> {
            while (true) {
                try {
                    String entityURI = uris.entityIdToURI(change.entityId());
                    Set<String> existingValues = trueChanges.repoValues.get(entityURI);
                    Set<String> existingRefs = trueChanges.repoRefs.get(entityURI);
                    handleChange(change, existingValues, existingRefs);
                    return change;
                } catch (RetryableException e) {
                    log.warn("Retryable error syncing.  Retrying.", e);
                } catch (ContainedException e) {
                    log.warn("Contained error syncing.  Giving up on {}", change.entityId(), e);
                    throw e;
                }
            }
        }));
    }
    List<Change> processedChanges = new ArrayList<>(futureChanges.size());
    for (Future<Change> f : futureChanges) {
        try {
            processedChanges.add(f.get());
        } catch (ExecutionException ignore) {
        // failure has already been logged
        }
    }
    return processedChanges;
}
Also used : HashSet(java.util.HashSet) EnumSet(java.util.EnumSet) Set(java.util.Set) RetryableException(org.wikidata.query.rdf.tool.exception.RetryableException) ArrayList(java.util.ArrayList) Future(java.util.concurrent.Future) Change(org.wikidata.query.rdf.tool.change.Change) ContainedException(org.wikidata.query.rdf.tool.exception.ContainedException) ExecutionException(java.util.concurrent.ExecutionException)

Example 9 with ContainedException

use of org.wikidata.query.rdf.tool.exception.ContainedException in project wikidata-query-rdf by wikimedia.

the class Updater method handleChange.

/**
 * Handle a change.
 * <ul>
 * <li>Check if the RDF store has the version of the page.
 * <li>Fetch the RDF from the Wikibase install.
 * <li>Add revision information to the statements if it isn't there already.
 * <li>Sync data to the triple store.
 * </ul>
 *
 * @throws RetryableException if there is a retryable error updating the rdf
 *             store
 */
private void handleChange(Change change, Set<String> repoValues, Set<String> repoRefs) throws RetryableException {
    log.debug("Processing data for {}", change);
    Collection<Statement> statements;
    try {
        statements = wikibase.fetchRdfForEntity(change);
    } catch (WikibaseEntityFetchException e) {
        if (DELETE_ENTITY_ERROR_TYPE.contains(e.getErrorType())) {
            log.debug("Cannot fetch entity (deleting entity): ", e);
            statements = new ArrayList<>();
        } else {
            throw new ContainedException("Received un-recoverable error fetching entity data for " + change.entityId(), e);
        }
    }
    if (verify) {
        Set<String> entityStmtsWithoutRank = statements.stream().collect(entityStatementsWithoutRank());
        if (!entityStmtsWithoutRank.isEmpty()) {
            log.warn("Found some statements without ranks while processing {}: {}", change.entityId(), entityStmtsWithoutRank);
        }
    }
    Set<String> valuesToClean = Collections.emptySet();
    Set<String> referencesToClean = Collections.emptySet();
    if (!statements.isEmpty()) {
        valuesToClean = RdfRepository.extractValuesToCleanup(repoValues, statements);
        referencesToClean = RdfRepository.extractReferencesToCleanup(repoRefs, statements);
        long fetchedRev = munger.munge(change.entityId(), statements);
        // If we've got no statements, we have no usable loaded data, so no point in checking
        // Same if we just got back our own change - no point in checking against it
        final long sourceRev = change.revision();
        if (sourceRev > 0 && fetchedRev > 0) {
            if (fetchedRev < sourceRev) {
                // Something weird happened - we've got stale revision!
                log.warn("Stale revision on {}: change is {}, RDF is {}", change.entityId(), sourceRev, fetchedRev);
                metricsRepository.incDeferredChanges();
                deferredChanges.add(change, DEFERRAL_DELAY);
            }
            if (sourceRev < fetchedRev) {
                // We skipped some revisions, let's count it in meter
                metricsRepository.markSkipAhead();
            }
        }
    }
    /*
         * TODO: we temporarily keep all the ref data because of the issues
         * in https://phabricator.wikimedia.org/T194325
         * see Change-ID Ia6c68a5b93e8c9a35310892904819c956ca9cd95
         * or git commit hash 2931b5af725b7ab341dd60920710619fa249d1f2
         * for more context
         */
    referencesToClean = Collections.emptySet();
    change.setRefCleanupList(referencesToClean);
    /*
         * TODO: we disable values cleanup to measure the impact on the lag
         *  see: T249196
         */
    valuesToClean = Collections.emptySet();
    change.setValueCleanupList(valuesToClean);
    change.setStatements(statements);
}
Also used : Statement(org.openrdf.model.Statement) ArrayList(java.util.ArrayList) ContainedException(org.wikidata.query.rdf.tool.exception.ContainedException) WikibaseEntityFetchException(org.wikidata.query.rdf.tool.wikibase.WikibaseEntityFetchException)

Aggregations

ContainedException (org.wikidata.query.rdf.tool.exception.ContainedException)9 IOException (java.io.IOException)4 ExecutionException (java.util.concurrent.ExecutionException)4 RetryableException (org.wikidata.query.rdf.tool.exception.RetryableException)4 ArrayList (java.util.ArrayList)3 Timer (com.codahale.metrics.Timer)2 RetryException (com.github.rholder.retry.RetryException)2 InputStreamReader (java.io.InputStreamReader)2 SocketException (java.net.SocketException)2 UnknownHostException (java.net.UnknownHostException)2 Future (java.util.concurrent.Future)2 SSLHandshakeException (javax.net.ssl.SSLHandshakeException)2 CloseableHttpResponse (org.apache.http.client.methods.CloseableHttpResponse)2 HttpGet (org.apache.http.client.methods.HttpGet)2 ContentResponse (org.eclipse.jetty.client.api.ContentResponse)2 Statement (org.openrdf.model.Statement)2 RDFHandlerException (org.openrdf.rio.RDFHandlerException)2 RDFParseException (org.openrdf.rio.RDFParseException)2 RDFParser (org.openrdf.rio.RDFParser)2 StatementCollector (org.openrdf.rio.helpers.StatementCollector)2