use of org.wikidata.query.rdf.tool.exception.RetryableException in project wikidata-query-rdf by wikimedia.
the class Updater method handleChanges.
/**
* Handle the changes in a batch.
*
* @throws InterruptedException if the process is interrupted while waiting
* on changes to sync
* @throws ExecutionException if there is an error syncing any of the
* changes
*/
protected void handleChanges(Iterable<Change> changes) throws InterruptedException, ExecutionException {
Set<Change> trueChanges = getRevisionUpdates(changes);
long start = System.currentTimeMillis();
List<Future<Change>> futureChanges = new ArrayList<>();
for (Change change : trueChanges) {
futureChanges.add(executor.submit(() -> {
while (true) {
try {
handleChange(change);
return change;
} catch (RetryableException e) {
log.warn("Retryable error syncing. Retrying.", e);
} catch (ContainedException e) {
log.warn("Contained error syncing. Giving up on {}", change.entityId(), e);
throw e;
}
}
}));
}
List<Change> processedChanges = new ArrayList<>();
for (Future<Change> f : futureChanges) {
try {
processedChanges.add(f.get());
} catch (ExecutionException ignore) {
// failure has already been logged
}
}
log.debug("Preparing update data took {} ms, have {} changes", System.currentTimeMillis() - start, processedChanges.size());
rdfRepository.syncFromChanges(processedChanges, verify);
updateMeter.mark(processedChanges.size());
}
use of org.wikidata.query.rdf.tool.exception.RetryableException in project wikidata-query-rdf by wikimedia.
the class KafkaPoller method fetch.
/**
* Fetch changes from Kafka.
* @param lastNextStartTime where last fetch ended up.
* @return Set of changes.
* @throws RetryableException
*/
@SuppressWarnings({ "checkstyle:npathcomplexity", "checkstyle:cyclomaticcomplexity" })
private Batch fetch(Instant lastNextStartTime) throws RetryableException {
Map<String, Change> changesByTitle = new LinkedHashMap<>();
ConsumerRecords<String, ChangeEvent> records;
Map<String, Instant> timesByTopic = newHashMapWithExpectedSize(topics.size());
while (true) {
try {
// TODO: make timeout configurable? Wait for a bit so we catch bursts of messages?
records = consumer.poll(1000);
} catch (InterruptException | WakeupException e) {
throw new RetryableException("Error fetching recent changes", e);
}
int count = records.count();
log.info("Fetched {} records from Kafka", count);
if (count == 0) {
// If we got nothing from Kafka, get out of the loop and return what we have
break;
}
boolean foundSomething = false;
for (ConsumerRecord<String, ChangeEvent> record : records) {
ChangeEvent event = record.value();
String topic = record.topic();
log.debug("Got event t:{} o:{}", record.topic(), record.offset());
if (!event.domain().equals(uris.getHost())) {
// wrong domain, ignore
continue;
}
// check namespace
if (!uris.isEntityNamespace(event.namespace())) {
continue;
}
if (event.isRedundant()) {
// This is a redundant event, we can skip it.
continue;
}
// Now we have event that we want to process
foundSomething = true;
// Keep max time per topic
timesByTopic.put(topic, Utils.max(Instant.ofEpochMilli(record.timestamp()), timesByTopic.getOrDefault(topic, null)));
// Using offset here as RC id since we do not have real RC id (this not being RC poller) but
// the offset serves the same function in Kafka and is also useful for debugging.
Change change = new Change(event.title(), event.revision(), event.timestamp(), record.offset());
Change dupe = changesByTitle.put(change.entityId(), change);
// This is not a big deal since deletes are relatively rare.
if (dupe != null && change.revision() > Change.NO_REVISION && (dupe.revision() > change.revision() || dupe.revision() == Change.NO_REVISION)) {
// need to remove so that order will be correct
changesByTitle.remove(change.entityId());
changesByTitle.put(change.entityId(), dupe);
}
}
if (changesByTitle.size() >= batchSize) {
// We have enough for the batch
break;
}
if (changesByTitle.size() > 0 && !foundSomething) {
log.info("Did not find anything useful in this batch, returning existing data");
// wait for more.
break;
}
// TODO: if we already have something and we've spent more than X seconds in the loop,
// we probably should return without waiting for more
}
// Here we are using min, not max, since some topics may be lagging behind
// and we should catch them up if we are restarted.
// Note that this means we could get repeated items from more advanced topics,
// but that's ok, since changes are checked by revid anyway.
// This is important only on updater restart, otherwise Kafka offsets should
// take care of tracking things.
Instant nextInstant = timesByTopic.values().stream().min(Instant::compareTo).orElse(lastNextStartTime);
// FIXME: Note that due to batching nature of Kafka this timestamp could actually jump
// back and forth. Not sure what to do about it.
final ImmutableList<Change> changes = ImmutableList.copyOf(changesByTitle.values());
log.info("Found {} changes", changes.size());
long advanced = ChronoUnit.MILLIS.between(lastNextStartTime, nextInstant);
// be sure we got the whole second
return new Batch(changes, advanced, nextInstant.minusSeconds(1).toString(), nextInstant);
}
use of org.wikidata.query.rdf.tool.exception.RetryableException in project wikidata-query-rdf by wikimedia.
the class WikibaseRepository method delete.
/**
* Delete entity from repository.
* @param entityId
* @throws RetryableException thrown if there is an error communicating with
* wikibase
*/
public void delete(String entityId) throws RetryableException {
URI uri = uris.delete(entityId);
log.debug("Deleting entity {} using {}", entityId, uri);
try {
DeleteResponse result = checkApi(getJson(postWithToken(uri), DeleteResponse.class));
log.debug("Deleted: {}", result);
} catch (IOException e) {
throw new RetryableException("Error deleting page", e);
}
}
use of org.wikidata.query.rdf.tool.exception.RetryableException in project wikidata-query-rdf by wikimedia.
the class WikibaseRepository method setLabel.
/**
* Edits or creates a page by setting a label. Used for testing.
*
* @param entityId id of the entity - if null then the entity will be
* created new
* @param type type of entity to create or edit
* @param label label of the page to create
* @param language language of the label to add
* @return the entityId
* @throws RetryableException thrown if there is an error communicating with
* wikibase
*/
@SuppressWarnings("unchecked")
public String setLabel(String entityId, String type, String label, String language) throws RetryableException {
String datatype = type.equals("property") ? "string" : null;
EditRequest data = new EditRequest(datatype, ImmutableMap.of(language, new Label(language, label)));
try {
URI uri = uris.edit(entityId, type, mapper.writeValueAsString(data));
log.debug("Editing entity using {}", uri);
EditResponse result = checkApi(getJson(postWithToken(uri), EditResponse.class));
return result.getEntity().getId();
} catch (IOException e) {
throw new RetryableException("Error adding page", e);
}
}
use of org.wikidata.query.rdf.tool.exception.RetryableException in project wikidata-query-rdf by wikimedia.
the class WikibaseRepository method firstEntityIdForLabelStartingWith.
/**
* Get the first id with the provided label in the provided language.
*
* @throws RetryableException thrown if there is an error communicating with
* wikibase
*/
public String firstEntityIdForLabelStartingWith(String label, String language, String type) throws RetryableException {
URI uri = uris.searchForLabel(label, language, type);
log.debug("Searching for entity using {}", uri);
try {
SearchResponse result = checkApi(getJson(new HttpGet(uri), SearchResponse.class));
List<SearchResult> resultList = result.getSearch();
if (resultList.isEmpty()) {
return null;
}
return resultList.get(0).getId();
} catch (IOException e) {
throw new RetryableException("Error searching for page", e);
}
}
Aggregations