use of won.protocol.rest.LinkedDataFetchingException in project webofneeds by researchstudio-sat.
the class LinkedDataSourceBase method getDataForResource.
private Dataset getDataForResource(final URI resourceURI, final Optional<URI> requesterWebID, final int maxRequest, final int maxDepth, BiFunction<Dataset, Set<URI>, Set<URI>> findNextUrisFunction) {
Set<URI> crawledURIs = new HashSet<>();
Set<URI> newlyDiscoveredURIs = new HashSet<>();
newlyDiscoveredURIs.add(resourceURI);
int depth = 0;
int requests = 0;
final Dataset dataset = makeDataset();
while (newlyDiscoveredURIs.size() > 0 && depth < maxDepth && requests < maxRequest) {
final Set<URI> urisToCrawl = retainOnlyAllowedAmount(newlyDiscoveredURIs, maxRequest, requests);
// hack: there may be a threadLocal with the authentication data we need further
// down the call stack
// if there is one, we need to add that to the threads we use in the following
// parallel construct
final Optional<Object> authenticationOpt = AuthenticationThreadLocal.hasValue() ? Optional.of(AuthenticationThreadLocal.getAuthentication()) : Optional.empty();
Future<Optional<Dataset>> crawledData = parallelRequestsThreadpool.submit(() -> urisToCrawl.parallelStream().map(uri -> {
try {
if (authenticationOpt.isPresent()) {
// theadlocal hack mentioned above
AuthenticationThreadLocal.setAuthentication(authenticationOpt.get());
}
return requesterWebID.isPresent() ? getDataForResource(uri, requesterWebID.get()) : getDataForPublicResource(uri);
} finally {
// be sure to remove the principal from the threadlocal after the call
AuthenticationThreadLocal.remove();
}
}).reduce(RdfUtils::addDatasetToDataset));
Optional<Dataset> crawledDataset;
try {
crawledDataset = crawledData.get();
} catch (ExecutionException e) {
Throwable cause = e.getCause();
if (cause instanceof LinkedDataFetchingException) {
throw (LinkedDataFetchingException) cause;
}
throw new RuntimeException("Could not retrieve data for multiple URIs", e);
} catch (Exception e) {
throw new RuntimeException("Could not retrieve data for multiple URIs", e);
}
if (crawledDataset.isPresent()) {
// Add crawledDataset to dataset, replacing any named models contained in both.
// We do this because
// 1. merging does not work properly in the presence of blank nodes - they end
// up duplicated
// 2. we do not expect to find the same named model with different content, so
// merging should have no visible effect at all
RdfUtils.addDatasetToDataset(dataset, crawledDataset.get(), true);
}
crawledURIs.addAll(urisToCrawl);
requests += urisToCrawl.size();
newlyDiscoveredURIs = new HashSet<>(findNextUrisFunction.apply(dataset, crawledURIs));
depth++;
logger.debug("current Depth: " + depth);
}
return dataset;
}
use of won.protocol.rest.LinkedDataFetchingException in project webofneeds by researchstudio-sat.
the class LinkedDataSourceBase method getDataForResource.
@Override
public Dataset getDataForResource(URI resource, final URI requesterWebID) {
if (resource == null || requesterWebID == null) {
throw new IllegalArgumentException("resource and requester must not be null");
}
resource = wonMessageUriResolver.toLocalMessageURI(resource, this);
logger.debug("fetching linked data for URI {} requester {}", resource, requesterWebID);
Dataset dataset = DatasetFactory.createGeneral();
try {
dataset = linkedDataRestClient.readResourceData(resource, requesterWebID);
if (logger.isDebugEnabled()) {
logger.debug("fetched resource {} with requesterWebId {}:", resource, requesterWebID);
RDFDataMgr.write(System.out, dataset, Lang.TRIG);
}
} catch (LinkedDataFetchingException e) {
throw e;
} catch (Exception e) {
logger.debug(String.format("Couldn't fetch resource %s", resource), e);
}
return dataset;
}
use of won.protocol.rest.LinkedDataFetchingException in project webofneeds by researchstudio-sat.
the class SignatureCheckingWonMessageProcessor method process.
@Override
public WonMessage process(final WonMessage message) throws WonMessageProcessingException {
StopWatch sw = new StopWatch();
try {
SignatureVerificationState result;
/*
* If the message is a successResponse to a delete Message then we can't check
* the signature as it is stored in the deleted Atom, so we just accept the
* message as valid and return it.
*/
if (message.getRespondingToMessageType() == WonMessageType.DELETE && message.getMessageType() == WonMessageType.SUCCESS_RESPONSE) {
return message;
}
for (WonMessage toCheck : message.getAllMessages()) {
try {
// obtain public keys
sw.start("get public keys");
Map<String, PublicKey> keys = WonKeysReaderWriter.readKeyFromMessage(toCheck);
WonMessageType type = toCheck.getMessageType();
switch(type) {
case CREATE_ATOM:
if (keys.isEmpty()) {
throw new WonMessageProcessingException("No key found in CREATE message");
}
break;
case REPLACE:
if (keys.isEmpty()) {
keys.putAll(getRequiredPublicKeys(toCheck.getCompleteDataset()));
}
break;
default:
if (!keys.isEmpty()) {
throw new WonMessageProcessingException(String.format("An Atom key may only be embedded in CREATE or REPLACE messages! Found one in %s message %s", type, message.getMessageURIRequired()));
}
keys.putAll(getRequiredPublicKeys(toCheck.getCompleteDataset()));
}
sw.stop();
// verify with those public keys
sw.start("verify");
result = WonMessageSignerVerifier.verify(keys, toCheck);
sw.stop();
if (logger.isDebugEnabled()) {
logger.debug("VERIFIED=" + result.isVerificationPassed() + " with keys: " + keys.values() + " for\n" + RdfUtils.writeDatasetToString(Prefixer.setPrefixes(toCheck.getCompleteDataset()), Lang.TRIG));
}
} catch (LinkedDataFetchingException e) {
/*
* If a delete message could not be validated because the atom was already
* deleted, we assume that this message is just mirrored back to the owner and
* is to be accepteed
*/
if (WonMessageType.DELETE.equals(toCheck.getMessageType())) {
if (e.getCause() instanceof HttpClientErrorException && HttpStatus.GONE.equals(((HttpClientErrorException) e.getCause()).getStatusCode())) {
if (logger.isDebugEnabled()) {
logger.debug("Failure during processing signature check of message" + toCheck.getMessageURI() + " (messageType was DELETE, but atom is already deleted, accept message anyway)");
}
return toCheck;
}
}
// TODO SignatureProcessingException?
throw new WonMessageProcessingException("Could not verify message " + toCheck.getMessageURI(), e);
} catch (Exception e) {
// TODO SignatureProcessingException?
throw new WonMessageProcessingException("Could not verify message " + toCheck.getMessageURI(), e);
}
// throw exception if the verification fails:
if (!result.isVerificationPassed()) {
String errormessage = "Message verification failed. Message:" + toCheck.toStringForDebug(false) + ", Problem:" + result.getMessage();
if (logger.isDebugEnabled()) {
logger.debug(errormessage + ". Offending message:\n" + RdfUtils.toString(Prefixer.setPrefixes(toCheck.getCompleteDataset())));
}
// TODO SignatureProcessingException?
throw new WonMessageProcessingException(new SignatureException(errormessage + ". To log the offending message, set Loglevel to DEBUG for logger '" + this.getClass().getName() + "'"));
}
}
return message;
} finally {
logger.debug(LogMarkers.TIMING, "Signature check for message {} took {} millis, details:\n {}", new Object[] { message.getMessageURIRequired(), sw.getTotalTimeMillis(), sw.prettyPrint() });
}
}
use of won.protocol.rest.LinkedDataFetchingException in project webofneeds by researchstudio-sat.
the class WorkerCrawlerActor method crawlUri.
private void crawlUri(CrawlUriMessage uriMsg) {
Dataset ds = null;
List<String> etags = null;
Lock lock = null;
try {
// check if resource is already downloaded
if (uriMsg instanceof ResourceCrawlUriMessage) {
ResourceCrawlUriMessage resMsg = ((ResourceCrawlUriMessage) uriMsg);
if (resMsg.getSerializedResource() != null && resMsg.getSerializationFormat() != null) {
// TODO: this should be optimized, why deserialize the resource here when we
// just want to save it in the RDF
// store? How to insert this serialized resource into the SPARQL endpoint?
ds = SparqlService.deserializeDataset(resMsg.getSerializedResource(), resMsg.getSerializationFormat());
}
}
// download resource if not already downloaded
if (ds == null) {
// use ETag/If-None-Match Headers to make the process more efficient
HttpHeaders httpHeaders = new HttpHeaders();
if (uriMsg.getResourceETagHeaderValues() != null && !uriMsg.getResourceETagHeaderValues().isEmpty()) {
String ifNoneMatchHeaderValue = StringUtils.collectionToDelimitedString(uriMsg.getResourceETagHeaderValues(), ", ");
httpHeaders.add("If-None-Match", ifNoneMatchHeaderValue);
}
DatasetResponseWithStatusCodeAndHeaders datasetWithHeaders = linkedDataSource.getDatasetWithHeadersForResource(URI.create(uriMsg.getUri()), httpHeaders);
ds = datasetWithHeaders.getDataset();
etags = datasetWithHeaders.getResponseHeaders().get("ETag");
// if dataset was not modified (304) we can treat the current crawl uri as done
if (ds == null && datasetWithHeaders.getStatusCode() == 304) {
sendDoneUriMessage(uriMsg, uriMsg.getWonNodeUri(), etags);
return;
}
// if there is paging activated and the won node tells us that there is more
// data (previous link)
// to be downloaded, then we add this link to the crawling process too
String prevLink = linkedDataSource.getPreviousLinkFromDatasetWithHeaders(datasetWithHeaders);
if (prevLink != null) {
CrawlUriMessage newUriMsg = new CrawlUriMessage(uriMsg.getBaseUri(), prevLink, uriMsg.getWonNodeUri(), CrawlUriMessage.STATUS.PROCESS, System.currentTimeMillis(), null);
getSender().tell(newUriMsg, getSelf());
}
}
lock = ds == null ? null : ds.getLock();
lock.enterCriticalSection(true);
// Save dataset to triple store
sparqlService.updateNamedGraphsOfDataset(ds);
String wonNodeUri = extractWonNodeUri(ds, uriMsg.getUri());
if (wonNodeUri == null) {
wonNodeUri = uriMsg.getWonNodeUri();
}
// do nothing more here if the STATUS of the message was SAVE
if (uriMsg.getStatus().equals(CrawlUriMessage.STATUS.SAVE)) {
log.debug("processed crawl uri event {} with status 'SAVE'", uriMsg);
return;
}
// extract URIs from current resource and send extracted URI messages back to
// sender
log.debug("Extract URIs from message {}", uriMsg);
Set<CrawlUriMessage> newCrawlMessages = sparqlService.extractCrawlUriMessages(uriMsg.getBaseUri(), wonNodeUri);
for (CrawlUriMessage newMsg : newCrawlMessages) {
getSender().tell(newMsg, getSelf());
}
// signal sender that this URI is processed and save meta data about crawling
// the URI.
// This needs to be done after all extracted URI messages have been sent to
// guarantee consistency
// in case of failure
sendDoneUriMessage(uriMsg, wonNodeUri, etags);
// bu
if (AtomModelWrapper.isAAtom(ds)) {
AtomModelWrapper atomModelWrapper = new AtomModelWrapper(ds, false);
AtomState state = atomModelWrapper.getAtomState();
AtomEvent.TYPE type = state.equals(AtomState.ACTIVE) ? AtomEvent.TYPE.ACTIVE : AtomEvent.TYPE.INACTIVE;
log.debug("Created atom event for atom uri {}", uriMsg.getUri());
long crawlDate = System.currentTimeMillis();
AtomEvent atomEvent = new AtomEvent(uriMsg.getUri(), wonNodeUri, type, crawlDate, ds, Cause.CRAWLED);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(atomEvent.getClass().getName(), atomEvent), getSelf());
}
} catch (RestClientException e1) {
// usually happens if the fetch of the dataset fails e.g.
// HttpServerErrorException, HttpClientErrorException
log.debug("Exception during crawling: " + e1);
throw new CrawlWrapperException(e1, uriMsg);
} catch (LinkedDataFetchingException e) {
log.debug("Exception during crawling: " + e);
Throwable cause = e.getCause();
if (cause instanceof HttpClientErrorException && Objects.equals(((HttpClientErrorException) cause).getStatusCode(), HttpStatus.GONE)) {
log.debug("Uri used to exist, but has been deleted, deleting from rdf store.");
sendDeletedAtomMessage(uriMsg.getUri(), uriMsg.getWonNodeUri());
sendDeletedUriMessage(uriMsg, uriMsg.getWonNodeUri(), etags);
} else if (cause instanceof HttpClientErrorException && Objects.equals(((HttpClientErrorException) cause).getStatusCode(), HttpStatus.FORBIDDEN)) {
log.debug("Not allowed to access uri, marking as done");
sendDoneUriMessage(uriMsg, uriMsg.getWonNodeUri(), etags);
} else {
throw new CrawlWrapperException(e, uriMsg);
}
} catch (Exception e) {
log.debug("Exception during crawling: " + e);
throw new CrawlWrapperException(e, uriMsg);
} finally {
if (lock != null) {
lock.leaveCriticalSection();
}
}
}
use of won.protocol.rest.LinkedDataFetchingException in project webofneeds by researchstudio-sat.
the class LinkedDataSourceBase method getDataForResource.
private Dataset getDataForResource(final URI resourceURI, final URI requesterWebID, final int maxRequest, final int maxDepth, BiFunction<Dataset, Set<URI>, Set<URI>> findNextUrisFunction) {
Set<URI> crawledURIs = new HashSet<URI>();
Set<URI> newlyDiscoveredURIs = new HashSet<URI>();
newlyDiscoveredURIs.add(resourceURI);
int depth = 0;
int requests = 0;
final Dataset dataset = makeDataset();
OUTER: while (newlyDiscoveredURIs.size() > 0 && depth < maxDepth && requests < maxRequest) {
final Set<URI> urisToCrawl = retainOnlyAllowedAmount(newlyDiscoveredURIs, maxRequest, requests);
// hack: there may be a threadLocal with the authentication data we need further down the call stack
// if there is one, we need to add that to the threads we use in the following parallel construct
final Optional<Object> authenticationOpt = won.protocol.util.AuthenticationThreadLocal.hasValue() ? Optional.of(AuthenticationThreadLocal.getAuthentication()) : Optional.empty();
Future<Optional<Dataset>> crawledData = parallelRequestsThreadpool.submit(() -> urisToCrawl.parallelStream().map(uri -> {
try {
if (authenticationOpt.isPresent()) {
// theadlocal hack mentioned above
AuthenticationThreadLocal.setAuthentication(authenticationOpt.get());
}
return requesterWebID == null ? getDataForResource(uri) : getDataForResource(uri, requesterWebID);
} finally {
// be sure to remove the principal from the threadlocal after the call
AuthenticationThreadLocal.remove();
}
}).reduce((all, current) -> RdfUtils.addDatasetToDataset(all, current)));
Optional<Dataset> crawledDataset;
try {
crawledDataset = crawledData.get();
} catch (ExecutionException e) {
Throwable cause = e.getCause();
if (cause instanceof LinkedDataFetchingException) {
throw (LinkedDataFetchingException) cause;
}
throw new RuntimeException("Could not retrieve data for multiple URIs", e);
} catch (Exception e) {
throw new RuntimeException("Could not retrieve data for multiple URIs", e);
}
if (crawledDataset.isPresent()) {
RdfUtils.addDatasetToDataset(dataset, crawledDataset.get());
}
crawledURIs.addAll(urisToCrawl);
requests += urisToCrawl.size();
newlyDiscoveredURIs = new HashSet<URI>();
newlyDiscoveredURIs.addAll(findNextUrisFunction.apply(dataset, crawledURIs));
depth++;
logger.debug("current Depth: " + depth);
}
return dataset;
}
Aggregations