Search in sources :

Example 6 with AtomModelWrapper

use of won.protocol.util.AtomModelWrapper in project webofneeds by researchstudio-sat.

the class WorkerCrawlerActor method crawlUri.

private void crawlUri(CrawlUriMessage uriMsg) {
    Dataset ds = null;
    List<String> etags = null;
    Lock lock = null;
    try {
        // check if resource is already downloaded
        if (uriMsg instanceof ResourceCrawlUriMessage) {
            ResourceCrawlUriMessage resMsg = ((ResourceCrawlUriMessage) uriMsg);
            if (resMsg.getSerializedResource() != null && resMsg.getSerializationFormat() != null) {
                // TODO: this should be optimized, why deserialize the resource here when we
                // just want to save it in the RDF
                // store? How to insert this serialized resource into the SPARQL endpoint?
                ds = SparqlService.deserializeDataset(resMsg.getSerializedResource(), resMsg.getSerializationFormat());
            }
        }
        // download resource if not already downloaded
        if (ds == null) {
            // use ETag/If-None-Match Headers to make the process more efficient
            HttpHeaders httpHeaders = new HttpHeaders();
            if (uriMsg.getResourceETagHeaderValues() != null && !uriMsg.getResourceETagHeaderValues().isEmpty()) {
                String ifNoneMatchHeaderValue = StringUtils.collectionToDelimitedString(uriMsg.getResourceETagHeaderValues(), ", ");
                httpHeaders.add("If-None-Match", ifNoneMatchHeaderValue);
            }
            DatasetResponseWithStatusCodeAndHeaders datasetWithHeaders = linkedDataSource.getDatasetWithHeadersForResource(URI.create(uriMsg.getUri()), httpHeaders);
            ds = datasetWithHeaders.getDataset();
            etags = datasetWithHeaders.getResponseHeaders().get("ETag");
            // if dataset was not modified (304) we can treat the current crawl uri as done
            if (ds == null && datasetWithHeaders.getStatusCode() == 304) {
                sendDoneUriMessage(uriMsg, uriMsg.getWonNodeUri(), etags);
                return;
            }
            // if there is paging activated and the won node tells us that there is more
            // data (previous link)
            // to be downloaded, then we add this link to the crawling process too
            String prevLink = linkedDataSource.getPreviousLinkFromDatasetWithHeaders(datasetWithHeaders);
            if (prevLink != null) {
                CrawlUriMessage newUriMsg = new CrawlUriMessage(uriMsg.getBaseUri(), prevLink, uriMsg.getWonNodeUri(), CrawlUriMessage.STATUS.PROCESS, System.currentTimeMillis(), null);
                getSender().tell(newUriMsg, getSelf());
            }
        }
        lock = ds == null ? null : ds.getLock();
        lock.enterCriticalSection(true);
        // Save dataset to triple store
        sparqlService.updateNamedGraphsOfDataset(ds);
        String wonNodeUri = extractWonNodeUri(ds, uriMsg.getUri());
        if (wonNodeUri == null) {
            wonNodeUri = uriMsg.getWonNodeUri();
        }
        // do nothing more here if the STATUS of the message was SAVE
        if (uriMsg.getStatus().equals(CrawlUriMessage.STATUS.SAVE)) {
            log.debug("processed crawl uri event {} with status 'SAVE'", uriMsg);
            return;
        }
        // extract URIs from current resource and send extracted URI messages back to
        // sender
        log.debug("Extract URIs from message {}", uriMsg);
        Set<CrawlUriMessage> newCrawlMessages = sparqlService.extractCrawlUriMessages(uriMsg.getBaseUri(), wonNodeUri);
        for (CrawlUriMessage newMsg : newCrawlMessages) {
            getSender().tell(newMsg, getSelf());
        }
        // signal sender that this URI is processed and save meta data about crawling
        // the URI.
        // This needs to be done after all extracted URI messages have been sent to
        // guarantee consistency
        // in case of failure
        sendDoneUriMessage(uriMsg, wonNodeUri, etags);
        // bu
        if (AtomModelWrapper.isAAtom(ds)) {
            AtomModelWrapper atomModelWrapper = new AtomModelWrapper(ds, false);
            AtomState state = atomModelWrapper.getAtomState();
            AtomEvent.TYPE type = state.equals(AtomState.ACTIVE) ? AtomEvent.TYPE.ACTIVE : AtomEvent.TYPE.INACTIVE;
            log.debug("Created atom event for atom uri {}", uriMsg.getUri());
            long crawlDate = System.currentTimeMillis();
            AtomEvent atomEvent = new AtomEvent(uriMsg.getUri(), wonNodeUri, type, crawlDate, ds, Cause.CRAWLED);
            pubSubMediator.tell(new DistributedPubSubMediator.Publish(atomEvent.getClass().getName(), atomEvent), getSelf());
        }
    } catch (RestClientException e1) {
        // usually happens if the fetch of the dataset fails e.g.
        // HttpServerErrorException, HttpClientErrorException
        log.debug("Exception during crawling: " + e1);
        throw new CrawlWrapperException(e1, uriMsg);
    } catch (LinkedDataFetchingException e) {
        log.debug("Exception during crawling: " + e);
        Throwable cause = e.getCause();
        if (cause instanceof HttpClientErrorException && Objects.equals(((HttpClientErrorException) cause).getStatusCode(), HttpStatus.GONE)) {
            log.debug("Uri used to exist, but has been deleted, deleting from rdf store.");
            sendDeletedAtomMessage(uriMsg.getUri(), uriMsg.getWonNodeUri());
            sendDeletedUriMessage(uriMsg, uriMsg.getWonNodeUri(), etags);
        } else if (cause instanceof HttpClientErrorException && Objects.equals(((HttpClientErrorException) cause).getStatusCode(), HttpStatus.FORBIDDEN)) {
            log.debug("Not allowed to access uri, marking as done");
            sendDoneUriMessage(uriMsg, uriMsg.getWonNodeUri(), etags);
        } else {
            throw new CrawlWrapperException(e, uriMsg);
        }
    } catch (Exception e) {
        log.debug("Exception during crawling: " + e);
        throw new CrawlWrapperException(e, uriMsg);
    } finally {
        if (lock != null) {
            lock.leaveCriticalSection();
        }
    }
}
Also used : HttpHeaders(org.springframework.http.HttpHeaders) CrawlUriMessage(won.matcher.service.crawler.msg.CrawlUriMessage) ResourceCrawlUriMessage(won.matcher.service.crawler.msg.ResourceCrawlUriMessage) DatasetResponseWithStatusCodeAndHeaders(won.protocol.rest.DatasetResponseWithStatusCodeAndHeaders) HttpClientErrorException(org.springframework.web.client.HttpClientErrorException) AtomState(won.protocol.model.AtomState) DistributedPubSubMediator(akka.cluster.pubsub.DistributedPubSubMediator) Dataset(org.apache.jena.query.Dataset) CrawlWrapperException(won.matcher.service.crawler.exception.CrawlWrapperException) CrawlWrapperException(won.matcher.service.crawler.exception.CrawlWrapperException) IncorrectPropertyCountException(won.protocol.exception.IncorrectPropertyCountException) RestClientException(org.springframework.web.client.RestClientException) LinkedDataFetchingException(won.protocol.rest.LinkedDataFetchingException) HttpClientErrorException(org.springframework.web.client.HttpClientErrorException) Lock(org.apache.jena.shared.Lock) ResourceCrawlUriMessage(won.matcher.service.crawler.msg.ResourceCrawlUriMessage) AtomEvent(won.matcher.service.common.event.AtomEvent) LinkedDataFetchingException(won.protocol.rest.LinkedDataFetchingException) RestClientException(org.springframework.web.client.RestClientException) AtomModelWrapper(won.protocol.util.AtomModelWrapper)

Example 7 with AtomModelWrapper

use of won.protocol.util.AtomModelWrapper in project webofneeds by researchstudio-sat.

the class GoalInstantiationTest method example5_singleGoalsValidity.

@Test
public void example5_singleGoalsValidity() throws IOException {
    // check that the goals from each atom can be validated successfully without
    // each other
    Dataset atom1 = loadDataset(baseFolder + "ex5_atom.trig");
    Dataset atom2 = loadDataset(baseFolder + "ex5_atom_debug.trig");
    Dataset conversation = loadDataset(baseFolder + "ex5_conversation.trig");
    GoalInstantiationProducer goalInstantiation = new GoalInstantiationProducer(atom1, atom2, conversation, "http://example.org/", "http://example.org/blended/");
    Collection<GoalInstantiationResult> results = goalInstantiation.createAllGoalCombinationInstantiationResults();
    AtomModelWrapper atomWrapper1 = new AtomModelWrapper(atom1);
    Resource goal = atomWrapper1.getGoals().iterator().next();
    GoalInstantiationResult result = goalInstantiation.findInstantiationForGoal(goal);
    Assert.assertTrue(result.isConform());
    AtomModelWrapper atomWrapper2 = new AtomModelWrapper(atom2);
    goal = atomWrapper1.getGoals().iterator().next();
    result = goalInstantiation.findInstantiationForGoal(goal);
    Assert.assertTrue(result.isConform());
}
Also used : GoalInstantiationResult(won.utils.goals.GoalInstantiationResult) Dataset(org.apache.jena.query.Dataset) GoalInstantiationProducer(won.utils.goals.GoalInstantiationProducer) Resource(org.apache.jena.rdf.model.Resource) AtomModelWrapper(won.protocol.util.AtomModelWrapper) Test(org.junit.Test)

Example 8 with AtomModelWrapper

use of won.protocol.util.AtomModelWrapper in project webofneeds by researchstudio-sat.

the class GoalInstantiationTest method exampleTaxi_validity.

@Test
public void exampleTaxi_validity() throws IOException {
    Dataset taxiOffer = loadDataset(baseFolder + "ex6_taxioffer.trig");
    Dataset taxiDemand = loadDataset(baseFolder + "ex6_taxi.trig");
    Dataset taxiDemandNoLoc = loadDataset(baseFolder + "ex6_taxi_noloc.trig");
    Dataset taxiDemandTwoLoc = loadDataset(baseFolder + "ex6_taxi_twoloc.trig");
    GoalInstantiationProducer goalInstantiation = new GoalInstantiationProducer(taxiOffer, taxiDemand, null, "http://example.org/", "http://example.org/blended/");
    Collection<GoalInstantiationResult> results = goalInstantiation.createGoalInstantiationResultsForAtom1();
    for (GoalInstantiationResult res : results) {
        if (res.isConform()) {
            Coordinate departureAddress = getAddress(loadSparqlQuery("/won/utils/goals/extraction/address/fromLocationQuery.rq"), res.getInstanceModel());
            Coordinate destinationAddress = getAddress(loadSparqlQuery("/won/utils/goals/extraction/address/toLocationQuery.rq"), res.getInstanceModel());
            Assert.assertEquals(new Coordinate(10.0f, 11.0f), departureAddress);
            Assert.assertEquals(new Coordinate(12.0f, 13.0f), destinationAddress);
        }
    }
    AtomModelWrapper atomWrapper1 = new AtomModelWrapper(taxiOffer);
    Resource goal = atomWrapper1.getGoals().iterator().next();
    GoalInstantiationResult result = goalInstantiation.findInstantiationForGoal(goal);
    Assert.assertTrue(result.isConform());
    GoalInstantiationResult recheckResultModel = GoalInstantiationProducer.findInstantiationForGoalInDataset(taxiOffer, goal, result.getInstanceModel());
    Assert.assertTrue(recheckResultModel.isConform());
    goalInstantiation = new GoalInstantiationProducer(taxiOffer, taxiDemandNoLoc, null, "http://example.org/", "http://example.org/blended/");
    results = goalInstantiation.createGoalInstantiationResultsForAtom1();
    for (GoalInstantiationResult res : results) {
        Assert.assertFalse(res.isConform());
    }
    goalInstantiation = new GoalInstantiationProducer(taxiOffer, taxiDemandTwoLoc, null, "http://example.org/", "http://example.org/blended/");
    results = goalInstantiation.createGoalInstantiationResultsForAtom1();
    for (GoalInstantiationResult res : results) {
        Assert.assertFalse(res.isConform());
    }
}
Also used : GoalInstantiationResult(won.utils.goals.GoalInstantiationResult) Coordinate(won.protocol.model.Coordinate) Dataset(org.apache.jena.query.Dataset) GoalInstantiationProducer(won.utils.goals.GoalInstantiationProducer) Resource(org.apache.jena.rdf.model.Resource) AtomModelWrapper(won.protocol.util.AtomModelWrapper) Test(org.junit.Test)

Example 9 with AtomModelWrapper

use of won.protocol.util.AtomModelWrapper in project webofneeds by researchstudio-sat.

the class GoalInstantiationTest method example1_allInfoInTwoGoals.

@Test
public void example1_allInfoInTwoGoals() throws IOException {
    Dataset atom1 = loadDataset(baseFolder + "ex1_atom.trig");
    Dataset atom2 = loadDataset(baseFolder + "ex1_atom_debug.trig");
    Dataset conversation = loadDataset(baseFolder + "ex1_conversation.trig");
    GoalInstantiationProducer goalInstantiation = new GoalInstantiationProducer(atom1, atom2, conversation, "http://example.org/", "http://example.org/blended/");
    Collection<GoalInstantiationResult> results = goalInstantiation.createAllGoalCombinationInstantiationResults();
    // instantiation of combined goals must be conform
    Assert.assertEquals(1, results.size());
    Assert.assertTrue(results.iterator().next().isConform());
    // instantiation of goal of atom1 fails cause driver is missing
    AtomModelWrapper atomWrapper1 = new AtomModelWrapper(atom1);
    Resource goal = atomWrapper1.getGoals().iterator().next();
    GoalInstantiationResult result = goalInstantiation.findInstantiationForGoal(goal);
    Assert.assertFalse(result.isConform());
    Assert.assertEquals("hasDriver", result.getShaclReportWrapper().getValidationResults().iterator().next().getResultPath().getLocalName());
    // instantiation of goal of atom2 fails cause 3 attributes are missing:
    // location, time, client
    AtomModelWrapper atomWrapper2 = new AtomModelWrapper(atom2);
    goal = atomWrapper2.getGoals().iterator().next();
    result = goalInstantiation.findInstantiationForGoal(goal);
    Assert.assertFalse(result.isConform());
    Assert.assertEquals(3, result.getShaclReportWrapper().getValidationResults().size());
}
Also used : GoalInstantiationResult(won.utils.goals.GoalInstantiationResult) Dataset(org.apache.jena.query.Dataset) GoalInstantiationProducer(won.utils.goals.GoalInstantiationProducer) Resource(org.apache.jena.rdf.model.Resource) AtomModelWrapper(won.protocol.util.AtomModelWrapper) Test(org.junit.Test)

Example 10 with AtomModelWrapper

use of won.protocol.util.AtomModelWrapper in project webofneeds by researchstudio-sat.

the class GoalInstantiationTest method exampleCorrectTaxi_validity.

@Test
public void exampleCorrectTaxi_validity() throws IOException {
    Dataset taxiOffer = loadDataset(baseFolder + "exCorrect_taxioffer.trig");
    Dataset taxiDemand = loadDataset(baseFolder + "exCorrect_taxi.trig");
    GoalInstantiationProducer goalInstantiation = new GoalInstantiationProducer(taxiOffer, taxiDemand, null, "http://example.org/", "http://example.org/blended/");
    Collection<GoalInstantiationResult> results = goalInstantiation.createGoalInstantiationResultsForAtom1();
    for (GoalInstantiationResult res : results) {
        if (res.isConform()) {
            Coordinate departureAddress = getAddress(loadSparqlQuery("/won/utils/goals/extraction/address/fromLocationQuery.rq"), res.getInstanceModel());
            String departureName = getName(loadSparqlQuery("/won/utils/goals/extraction/address/fromLocationQuery.rq"), res.getInstanceModel());
            Coordinate destinationAddress = getAddress(loadSparqlQuery("/won/utils/goals/extraction/address/toLocationQuery.rq"), res.getInstanceModel());
            String destinationName = getName(loadSparqlQuery("/won/utils/goals/extraction/address/toLocationQuery.rq"), res.getInstanceModel());
        // Assert.assertEquals(departureAddress, new Coordinate(10.0f, 11.0f));
        // Assert.assertEquals(destinationAddress, new Coordinate(12.0f, 13.0f));
        }
    }
    AtomModelWrapper atomWrapper1 = new AtomModelWrapper(taxiOffer);
    Resource goal = atomWrapper1.getGoals().iterator().next();
    GoalInstantiationResult result = goalInstantiation.findInstantiationForGoal(goal);
    Assert.assertTrue(result.isConform());
    GoalInstantiationResult recheckResultModel = GoalInstantiationProducer.findInstantiationForGoalInDataset(taxiOffer, goal, result.getInstanceModel());
    Assert.assertTrue(recheckResultModel.isConform());
}
Also used : GoalInstantiationResult(won.utils.goals.GoalInstantiationResult) Coordinate(won.protocol.model.Coordinate) Dataset(org.apache.jena.query.Dataset) GoalInstantiationProducer(won.utils.goals.GoalInstantiationProducer) Resource(org.apache.jena.rdf.model.Resource) AtomModelWrapper(won.protocol.util.AtomModelWrapper) Test(org.junit.Test)

Aggregations

AtomModelWrapper (won.protocol.util.AtomModelWrapper)37 Dataset (org.apache.jena.query.Dataset)19 Resource (org.apache.jena.rdf.model.Resource)15 URI (java.net.URI)9 Model (org.apache.jena.rdf.model.Model)8 WonMessage (won.protocol.message.WonMessage)6 RdfUtils (won.protocol.util.RdfUtils)6 IOException (java.io.IOException)5 MethodHandles (java.lang.invoke.MethodHandles)5 LinkedList (java.util.LinkedList)5 Test (org.junit.Test)5 Logger (org.slf4j.Logger)5 LoggerFactory (org.slf4j.LoggerFactory)5 GoalInstantiationProducer (won.utils.goals.GoalInstantiationProducer)5 GoalInstantiationResult (won.utils.goals.GoalInstantiationResult)5 DistributedPubSubMediator (akka.cluster.pubsub.DistributedPubSubMediator)4 java.util (java.util)4 Collectors (java.util.stream.Collectors)4 StringUtils (org.apache.commons.lang3.StringUtils)4 ActorRef (akka.actor.ActorRef)3