use of won.protocol.util.AtomModelWrapper in project webofneeds by researchstudio-sat.
the class WorkerCrawlerActor method crawlUri.
private void crawlUri(CrawlUriMessage uriMsg) {
Dataset ds = null;
List<String> etags = null;
Lock lock = null;
try {
// check if resource is already downloaded
if (uriMsg instanceof ResourceCrawlUriMessage) {
ResourceCrawlUriMessage resMsg = ((ResourceCrawlUriMessage) uriMsg);
if (resMsg.getSerializedResource() != null && resMsg.getSerializationFormat() != null) {
// TODO: this should be optimized, why deserialize the resource here when we
// just want to save it in the RDF
// store? How to insert this serialized resource into the SPARQL endpoint?
ds = SparqlService.deserializeDataset(resMsg.getSerializedResource(), resMsg.getSerializationFormat());
}
}
// download resource if not already downloaded
if (ds == null) {
// use ETag/If-None-Match Headers to make the process more efficient
HttpHeaders httpHeaders = new HttpHeaders();
if (uriMsg.getResourceETagHeaderValues() != null && !uriMsg.getResourceETagHeaderValues().isEmpty()) {
String ifNoneMatchHeaderValue = StringUtils.collectionToDelimitedString(uriMsg.getResourceETagHeaderValues(), ", ");
httpHeaders.add("If-None-Match", ifNoneMatchHeaderValue);
}
DatasetResponseWithStatusCodeAndHeaders datasetWithHeaders = linkedDataSource.getDatasetWithHeadersForResource(URI.create(uriMsg.getUri()), httpHeaders);
ds = datasetWithHeaders.getDataset();
etags = datasetWithHeaders.getResponseHeaders().get("ETag");
// if dataset was not modified (304) we can treat the current crawl uri as done
if (ds == null && datasetWithHeaders.getStatusCode() == 304) {
sendDoneUriMessage(uriMsg, uriMsg.getWonNodeUri(), etags);
return;
}
// if there is paging activated and the won node tells us that there is more
// data (previous link)
// to be downloaded, then we add this link to the crawling process too
String prevLink = linkedDataSource.getPreviousLinkFromDatasetWithHeaders(datasetWithHeaders);
if (prevLink != null) {
CrawlUriMessage newUriMsg = new CrawlUriMessage(uriMsg.getBaseUri(), prevLink, uriMsg.getWonNodeUri(), CrawlUriMessage.STATUS.PROCESS, System.currentTimeMillis(), null);
getSender().tell(newUriMsg, getSelf());
}
}
lock = ds == null ? null : ds.getLock();
lock.enterCriticalSection(true);
// Save dataset to triple store
sparqlService.updateNamedGraphsOfDataset(ds);
String wonNodeUri = extractWonNodeUri(ds, uriMsg.getUri());
if (wonNodeUri == null) {
wonNodeUri = uriMsg.getWonNodeUri();
}
// do nothing more here if the STATUS of the message was SAVE
if (uriMsg.getStatus().equals(CrawlUriMessage.STATUS.SAVE)) {
log.debug("processed crawl uri event {} with status 'SAVE'", uriMsg);
return;
}
// extract URIs from current resource and send extracted URI messages back to
// sender
log.debug("Extract URIs from message {}", uriMsg);
Set<CrawlUriMessage> newCrawlMessages = sparqlService.extractCrawlUriMessages(uriMsg.getBaseUri(), wonNodeUri);
for (CrawlUriMessage newMsg : newCrawlMessages) {
getSender().tell(newMsg, getSelf());
}
// signal sender that this URI is processed and save meta data about crawling
// the URI.
// This needs to be done after all extracted URI messages have been sent to
// guarantee consistency
// in case of failure
sendDoneUriMessage(uriMsg, wonNodeUri, etags);
// bu
if (AtomModelWrapper.isAAtom(ds)) {
AtomModelWrapper atomModelWrapper = new AtomModelWrapper(ds, false);
AtomState state = atomModelWrapper.getAtomState();
AtomEvent.TYPE type = state.equals(AtomState.ACTIVE) ? AtomEvent.TYPE.ACTIVE : AtomEvent.TYPE.INACTIVE;
log.debug("Created atom event for atom uri {}", uriMsg.getUri());
long crawlDate = System.currentTimeMillis();
AtomEvent atomEvent = new AtomEvent(uriMsg.getUri(), wonNodeUri, type, crawlDate, ds, Cause.CRAWLED);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(atomEvent.getClass().getName(), atomEvent), getSelf());
}
} catch (RestClientException e1) {
// usually happens if the fetch of the dataset fails e.g.
// HttpServerErrorException, HttpClientErrorException
log.debug("Exception during crawling: " + e1);
throw new CrawlWrapperException(e1, uriMsg);
} catch (LinkedDataFetchingException e) {
log.debug("Exception during crawling: " + e);
Throwable cause = e.getCause();
if (cause instanceof HttpClientErrorException && Objects.equals(((HttpClientErrorException) cause).getStatusCode(), HttpStatus.GONE)) {
log.debug("Uri used to exist, but has been deleted, deleting from rdf store.");
sendDeletedAtomMessage(uriMsg.getUri(), uriMsg.getWonNodeUri());
sendDeletedUriMessage(uriMsg, uriMsg.getWonNodeUri(), etags);
} else if (cause instanceof HttpClientErrorException && Objects.equals(((HttpClientErrorException) cause).getStatusCode(), HttpStatus.FORBIDDEN)) {
log.debug("Not allowed to access uri, marking as done");
sendDoneUriMessage(uriMsg, uriMsg.getWonNodeUri(), etags);
} else {
throw new CrawlWrapperException(e, uriMsg);
}
} catch (Exception e) {
log.debug("Exception during crawling: " + e);
throw new CrawlWrapperException(e, uriMsg);
} finally {
if (lock != null) {
lock.leaveCriticalSection();
}
}
}
use of won.protocol.util.AtomModelWrapper in project webofneeds by researchstudio-sat.
the class GoalInstantiationTest method example5_singleGoalsValidity.
@Test
public void example5_singleGoalsValidity() throws IOException {
// check that the goals from each atom can be validated successfully without
// each other
Dataset atom1 = loadDataset(baseFolder + "ex5_atom.trig");
Dataset atom2 = loadDataset(baseFolder + "ex5_atom_debug.trig");
Dataset conversation = loadDataset(baseFolder + "ex5_conversation.trig");
GoalInstantiationProducer goalInstantiation = new GoalInstantiationProducer(atom1, atom2, conversation, "http://example.org/", "http://example.org/blended/");
Collection<GoalInstantiationResult> results = goalInstantiation.createAllGoalCombinationInstantiationResults();
AtomModelWrapper atomWrapper1 = new AtomModelWrapper(atom1);
Resource goal = atomWrapper1.getGoals().iterator().next();
GoalInstantiationResult result = goalInstantiation.findInstantiationForGoal(goal);
Assert.assertTrue(result.isConform());
AtomModelWrapper atomWrapper2 = new AtomModelWrapper(atom2);
goal = atomWrapper1.getGoals().iterator().next();
result = goalInstantiation.findInstantiationForGoal(goal);
Assert.assertTrue(result.isConform());
}
use of won.protocol.util.AtomModelWrapper in project webofneeds by researchstudio-sat.
the class GoalInstantiationTest method exampleTaxi_validity.
@Test
public void exampleTaxi_validity() throws IOException {
Dataset taxiOffer = loadDataset(baseFolder + "ex6_taxioffer.trig");
Dataset taxiDemand = loadDataset(baseFolder + "ex6_taxi.trig");
Dataset taxiDemandNoLoc = loadDataset(baseFolder + "ex6_taxi_noloc.trig");
Dataset taxiDemandTwoLoc = loadDataset(baseFolder + "ex6_taxi_twoloc.trig");
GoalInstantiationProducer goalInstantiation = new GoalInstantiationProducer(taxiOffer, taxiDemand, null, "http://example.org/", "http://example.org/blended/");
Collection<GoalInstantiationResult> results = goalInstantiation.createGoalInstantiationResultsForAtom1();
for (GoalInstantiationResult res : results) {
if (res.isConform()) {
Coordinate departureAddress = getAddress(loadSparqlQuery("/won/utils/goals/extraction/address/fromLocationQuery.rq"), res.getInstanceModel());
Coordinate destinationAddress = getAddress(loadSparqlQuery("/won/utils/goals/extraction/address/toLocationQuery.rq"), res.getInstanceModel());
Assert.assertEquals(new Coordinate(10.0f, 11.0f), departureAddress);
Assert.assertEquals(new Coordinate(12.0f, 13.0f), destinationAddress);
}
}
AtomModelWrapper atomWrapper1 = new AtomModelWrapper(taxiOffer);
Resource goal = atomWrapper1.getGoals().iterator().next();
GoalInstantiationResult result = goalInstantiation.findInstantiationForGoal(goal);
Assert.assertTrue(result.isConform());
GoalInstantiationResult recheckResultModel = GoalInstantiationProducer.findInstantiationForGoalInDataset(taxiOffer, goal, result.getInstanceModel());
Assert.assertTrue(recheckResultModel.isConform());
goalInstantiation = new GoalInstantiationProducer(taxiOffer, taxiDemandNoLoc, null, "http://example.org/", "http://example.org/blended/");
results = goalInstantiation.createGoalInstantiationResultsForAtom1();
for (GoalInstantiationResult res : results) {
Assert.assertFalse(res.isConform());
}
goalInstantiation = new GoalInstantiationProducer(taxiOffer, taxiDemandTwoLoc, null, "http://example.org/", "http://example.org/blended/");
results = goalInstantiation.createGoalInstantiationResultsForAtom1();
for (GoalInstantiationResult res : results) {
Assert.assertFalse(res.isConform());
}
}
use of won.protocol.util.AtomModelWrapper in project webofneeds by researchstudio-sat.
the class GoalInstantiationTest method example1_allInfoInTwoGoals.
@Test
public void example1_allInfoInTwoGoals() throws IOException {
Dataset atom1 = loadDataset(baseFolder + "ex1_atom.trig");
Dataset atom2 = loadDataset(baseFolder + "ex1_atom_debug.trig");
Dataset conversation = loadDataset(baseFolder + "ex1_conversation.trig");
GoalInstantiationProducer goalInstantiation = new GoalInstantiationProducer(atom1, atom2, conversation, "http://example.org/", "http://example.org/blended/");
Collection<GoalInstantiationResult> results = goalInstantiation.createAllGoalCombinationInstantiationResults();
// instantiation of combined goals must be conform
Assert.assertEquals(1, results.size());
Assert.assertTrue(results.iterator().next().isConform());
// instantiation of goal of atom1 fails cause driver is missing
AtomModelWrapper atomWrapper1 = new AtomModelWrapper(atom1);
Resource goal = atomWrapper1.getGoals().iterator().next();
GoalInstantiationResult result = goalInstantiation.findInstantiationForGoal(goal);
Assert.assertFalse(result.isConform());
Assert.assertEquals("hasDriver", result.getShaclReportWrapper().getValidationResults().iterator().next().getResultPath().getLocalName());
// instantiation of goal of atom2 fails cause 3 attributes are missing:
// location, time, client
AtomModelWrapper atomWrapper2 = new AtomModelWrapper(atom2);
goal = atomWrapper2.getGoals().iterator().next();
result = goalInstantiation.findInstantiationForGoal(goal);
Assert.assertFalse(result.isConform());
Assert.assertEquals(3, result.getShaclReportWrapper().getValidationResults().size());
}
use of won.protocol.util.AtomModelWrapper in project webofneeds by researchstudio-sat.
the class GoalInstantiationTest method exampleCorrectTaxi_validity.
@Test
public void exampleCorrectTaxi_validity() throws IOException {
Dataset taxiOffer = loadDataset(baseFolder + "exCorrect_taxioffer.trig");
Dataset taxiDemand = loadDataset(baseFolder + "exCorrect_taxi.trig");
GoalInstantiationProducer goalInstantiation = new GoalInstantiationProducer(taxiOffer, taxiDemand, null, "http://example.org/", "http://example.org/blended/");
Collection<GoalInstantiationResult> results = goalInstantiation.createGoalInstantiationResultsForAtom1();
for (GoalInstantiationResult res : results) {
if (res.isConform()) {
Coordinate departureAddress = getAddress(loadSparqlQuery("/won/utils/goals/extraction/address/fromLocationQuery.rq"), res.getInstanceModel());
String departureName = getName(loadSparqlQuery("/won/utils/goals/extraction/address/fromLocationQuery.rq"), res.getInstanceModel());
Coordinate destinationAddress = getAddress(loadSparqlQuery("/won/utils/goals/extraction/address/toLocationQuery.rq"), res.getInstanceModel());
String destinationName = getName(loadSparqlQuery("/won/utils/goals/extraction/address/toLocationQuery.rq"), res.getInstanceModel());
// Assert.assertEquals(departureAddress, new Coordinate(10.0f, 11.0f));
// Assert.assertEquals(destinationAddress, new Coordinate(12.0f, 13.0f));
}
}
AtomModelWrapper atomWrapper1 = new AtomModelWrapper(taxiOffer);
Resource goal = atomWrapper1.getGoals().iterator().next();
GoalInstantiationResult result = goalInstantiation.findInstantiationForGoal(goal);
Assert.assertTrue(result.isConform());
GoalInstantiationResult recheckResultModel = GoalInstantiationProducer.findInstantiationForGoalInDataset(taxiOffer, goal, result.getInstanceModel());
Assert.assertTrue(recheckResultModel.isConform());
}
Aggregations