Search in sources :

Example 1 with NeedState

use of won.protocol.model.NeedState in project webofneeds by researchstudio-sat.

the class WorkerCrawlerActor method crawlUri.

private void crawlUri(CrawlUriMessage uriMsg) {
    Dataset ds = null;
    List<String> etags = null;
    Lock lock = null;
    try {
        // check if resource is already downloaded
        if (uriMsg instanceof ResourceCrawlUriMessage) {
            ResourceCrawlUriMessage resMsg = ((ResourceCrawlUriMessage) uriMsg);
            if (resMsg.getSerializedResource() != null && resMsg.getSerializationFormat() != null) {
                // TODO: this should be optimized, why deserialize the resource here when we just want to save it in the RDF
                // store? How to insert this serialized resource into the SPARQL endpoint?
                ds = SparqlService.deserializeDataset(resMsg.getSerializedResource(), resMsg.getSerializationFormat());
            }
        }
        // download resource if not already downloaded
        if (ds == null) {
            // use ETag/If-None-Match Headers to make the process more efficient
            HttpHeaders httpHeaders = new HttpHeaders();
            if (uriMsg.getResourceETagHeaderValues() != null && !uriMsg.getResourceETagHeaderValues().isEmpty()) {
                String ifNoneMatchHeaderValue = StringUtils.collectionToDelimitedString(uriMsg.getResourceETagHeaderValues(), ", ");
                httpHeaders.add("If-None-Match", ifNoneMatchHeaderValue);
            }
            DatasetResponseWithStatusCodeAndHeaders datasetWithHeaders = linkedDataSource.getDatasetWithHeadersForResource(URI.create(uriMsg.getUri()), httpHeaders);
            ds = datasetWithHeaders.getDataset();
            etags = datasetWithHeaders.getResponseHeaders().get("ETag");
            // if dataset was not modified (304) we can treat the current crawl uri as done
            if (ds == null && datasetWithHeaders.getStatusCode() == 304) {
                sendDoneUriMessage(uriMsg, uriMsg.getWonNodeUri(), etags);
                return;
            }
            // if there is paging activated and the won node tells us that there is more data (previous link)
            // to be downloaded, then we add this link to the crawling process too
            String prevLink = linkedDataSource.getPreviousLinkFromDatasetWithHeaders(datasetWithHeaders);
            if (prevLink != null) {
                CrawlUriMessage newUriMsg = new CrawlUriMessage(uriMsg.getBaseUri(), prevLink, uriMsg.getWonNodeUri(), CrawlUriMessage.STATUS.PROCESS, System.currentTimeMillis(), null);
                getSender().tell(newUriMsg, getSelf());
            }
        }
        lock = ds == null ? null : ds.getLock();
        lock.enterCriticalSection(true);
        // Save dataset to triple store
        sparqlService.updateNamedGraphsOfDataset(ds);
        String wonNodeUri = extractWonNodeUri(ds, uriMsg.getUri());
        if (wonNodeUri == null) {
            wonNodeUri = uriMsg.getWonNodeUri();
        }
        // do nothing more here if the STATUS of the message was SAVE
        if (uriMsg.getStatus().equals(CrawlUriMessage.STATUS.SAVE)) {
            log.debug("processed crawl uri event {} with status 'SAVE'", uriMsg);
            return;
        }
        // extract URIs from current resource and send extracted URI messages back to sender
        log.debug("Extract URIs from message {}", uriMsg);
        Set<CrawlUriMessage> newCrawlMessages = sparqlService.extractCrawlUriMessages(uriMsg.getBaseUri(), wonNodeUri);
        for (CrawlUriMessage newMsg : newCrawlMessages) {
            getSender().tell(newMsg, getSelf());
        }
        // signal sender that this URI is processed and save meta data about crawling the URI.
        // This needs to be done after all extracted URI messages have been sent to guarantee consistency
        // in case of failure
        sendDoneUriMessage(uriMsg, wonNodeUri, etags);
        // if this URI/dataset was a need then send an event to the distributed event bu
        if (NeedModelWrapper.isANeed(ds)) {
            NeedModelWrapper needModelWrapper = new NeedModelWrapper(ds, false);
            NeedState state = needModelWrapper.getNeedState();
            NeedEvent.TYPE type = state.equals(NeedState.ACTIVE) ? NeedEvent.TYPE.ACTIVE : NeedEvent.TYPE.INACTIVE;
            log.debug("Created need event for need uri {}", uriMsg.getUri());
            long crawlDate = System.currentTimeMillis();
            NeedEvent needEvent = new NeedEvent(uriMsg.getUri(), wonNodeUri, type, crawlDate, ds);
            pubSubMediator.tell(new DistributedPubSubMediator.Publish(needEvent.getClass().getName(), needEvent), getSelf());
        }
    } catch (RestClientException e1) {
        // usually happens if the fetch of the dataset fails e.g. HttpServerErrorException, HttpClientErrorException
        log.debug("Exception during crawling: " + e1);
        throw new CrawlWrapperException(e1, uriMsg);
    } catch (Exception e) {
        log.debug("Exception during crawling: " + e);
        throw new CrawlWrapperException(e, uriMsg);
    } finally {
        if (lock != null) {
            lock.leaveCriticalSection();
        }
    }
}
Also used : HttpHeaders(org.springframework.http.HttpHeaders) CrawlUriMessage(won.matcher.service.crawler.msg.CrawlUriMessage) ResourceCrawlUriMessage(won.matcher.service.crawler.msg.ResourceCrawlUriMessage) DatasetResponseWithStatusCodeAndHeaders(won.protocol.rest.DatasetResponseWithStatusCodeAndHeaders) DistributedPubSubMediator(akka.cluster.pubsub.DistributedPubSubMediator) Dataset(org.apache.jena.query.Dataset) NeedState(won.protocol.model.NeedState) NeedModelWrapper(won.protocol.util.NeedModelWrapper) NeedEvent(won.matcher.service.common.event.NeedEvent) CrawlWrapperException(won.matcher.service.crawler.exception.CrawlWrapperException) CrawlWrapperException(won.matcher.service.crawler.exception.CrawlWrapperException) IncorrectPropertyCountException(won.protocol.exception.IncorrectPropertyCountException) RestClientException(org.springframework.web.client.RestClientException) Lock(org.apache.jena.shared.Lock) ResourceCrawlUriMessage(won.matcher.service.crawler.msg.ResourceCrawlUriMessage) RestClientException(org.springframework.web.client.RestClientException)

Example 2 with NeedState

use of won.protocol.model.NeedState in project webofneeds by researchstudio-sat.

the class LinkedDataWebController method showNeedURIListPage.

// webmvc controller method
@RequestMapping("${uri.path.page.need}")
public String showNeedURIListPage(@RequestParam(value = "p", required = false) Integer page, @RequestParam(value = "resumebefore", required = false) String beforeId, @RequestParam(value = "resumeafter", required = false) String afterId, @RequestParam(value = "state", required = false) String state, HttpServletRequest request, Model model, HttpServletResponse response) throws IOException {
    Dataset rdfDataset;
    NeedState needState = getNeedState(state);
    if (page == null && beforeId == null && afterId == null) {
        // all needs, does not support need state filtering for clients that do not
        // support paging
        rdfDataset = linkedDataService.listNeedURIs();
    } else if (page != null) {
        NeedInformationService.PagedResource<Dataset, URI> resource = linkedDataService.listNeedURIs(page, null, needState);
        rdfDataset = resource.getContent();
    } else if (beforeId != null) {
        URI referenceNeed = URI.create(this.needResourceURIPrefix + "/" + beforeId);
        NeedInformationService.PagedResource<Dataset, URI> resource = linkedDataService.listNeedURIsBefore(referenceNeed, null, needState);
        rdfDataset = resource.getContent();
    } else {
        // afterId != null
        URI referenceNeed = URI.create(this.needResourceURIPrefix + "/" + afterId);
        NeedInformationService.PagedResource<Dataset, URI> resource = linkedDataService.listNeedURIsAfter(referenceNeed, null, needState);
        rdfDataset = resource.getContent();
    }
    model.addAttribute("rdfDataset", rdfDataset);
    model.addAttribute("resourceURI", uriService.toResourceURIIfPossible(URI.create(request.getRequestURI())).toString());
    model.addAttribute("dataURI", uriService.toDataURIIfPossible(URI.create(request.getRequestURI())).toString());
    return "rdfDatasetView";
}
Also used : NeedInformationService(won.protocol.service.NeedInformationService) Dataset(org.apache.jena.query.Dataset) NeedState(won.protocol.model.NeedState) URI(java.net.URI) RequestMapping(org.springframework.web.bind.annotation.RequestMapping)

Example 3 with NeedState

use of won.protocol.model.NeedState in project webofneeds by researchstudio-sat.

the class LinkedDataWebController method listNeedURIs.

@RequestMapping(value = "${uri.path.data.need}", method = RequestMethod.GET, produces = { "application/ld+json", "application/trig", "application/n-quads" })
public ResponseEntity<Dataset> listNeedURIs(HttpServletRequest request, HttpServletResponse response, @RequestParam(value = "p", required = false) Integer page, @RequestParam(value = "resumebefore", required = false) String beforeId, @RequestParam(value = "resumeafter", required = false) String afterId, @RequestParam(value = "modifiedafter", required = false) String modifiedAfter, @RequestParam(value = "state", required = false) String state) throws IOException, ParseException {
    logger.debug("listNeedURIs() for page " + page + " called");
    Dataset rdfDataset;
    HttpHeaders headers = new HttpHeaders();
    Integer preferedSize = getPreferredSize(request);
    String passableQuery = getPassableQueryMap("state", state);
    NeedState needState = getNeedState(state);
    if (preferedSize == null && modifiedAfter == null) {
        // client doesn not support paging - return all needs; does not support need
        // state filtering for clients that do
        // not support paging
        rdfDataset = linkedDataService.listNeedURIs();
    } else if (page == null && beforeId == null && afterId == null && modifiedAfter == null) {
        // return latest needs
        NeedInformationService.PagedResource<Dataset, URI> resource = linkedDataService.listNeedURIs(1, preferedSize, needState);
        rdfDataset = resource.getContent();
        addPagedResourceInSequenceHeader(headers, URI.create(this.needResourceURIPrefix), resource, passableQuery);
    // resume before parameter specified - display the connections with activities
    // before the specified event id
    } else if (page != null) {
        NeedInformationService.PagedResource<Dataset, URI> resource = linkedDataService.listNeedURIs(page, preferedSize, needState);
        rdfDataset = resource.getContent();
        addPagedResourceInSequenceHeader(headers, URI.create(this.needResourceURIPrefix), resource, page, passableQuery);
    } else if (beforeId != null) {
        URI referenceNeed = URI.create(this.needResourceURIPrefix + "/" + beforeId);
        NeedInformationService.PagedResource<Dataset, URI> resource = linkedDataService.listNeedURIsBefore(referenceNeed, preferedSize, needState);
        rdfDataset = resource.getContent();
        addPagedResourceInSequenceHeader(headers, URI.create(this.needResourceURIPrefix), resource, passableQuery);
    } else if (afterId != null) {
        URI referenceNeed = URI.create(this.needResourceURIPrefix + "/" + afterId);
        NeedInformationService.PagedResource<Dataset, URI> resource = linkedDataService.listNeedURIsAfter(referenceNeed, preferedSize, needState);
        rdfDataset = resource.getContent();
        addPagedResourceInSequenceHeader(headers, URI.create(this.needResourceURIPrefix), resource, passableQuery);
    } else {
        // modifiedafter != null
        // do not support paging for modified needs for now
        DateParameter modifiedDate = new DateParameter(modifiedAfter);
        rdfDataset = linkedDataService.listModifiedNeedURIsAfter(modifiedDate.getDate());
    }
    addLocationHeaderIfNecessary(headers, URI.create(request.getRequestURI()), URI.create(this.needResourceURIPrefix));
    addMutableResourceHeaders(headers);
    addCORSHeader(headers);
    return new ResponseEntity<>(rdfDataset, headers, HttpStatus.OK);
}
Also used : NeedInformationService(won.protocol.service.NeedInformationService) HttpHeaders(org.springframework.http.HttpHeaders) ResponseEntity(org.springframework.http.ResponseEntity) Dataset(org.apache.jena.query.Dataset) NeedState(won.protocol.model.NeedState) URI(java.net.URI) RequestMapping(org.springframework.web.bind.annotation.RequestMapping)

Aggregations

Dataset (org.apache.jena.query.Dataset)3 NeedState (won.protocol.model.NeedState)3 URI (java.net.URI)2 HttpHeaders (org.springframework.http.HttpHeaders)2 RequestMapping (org.springframework.web.bind.annotation.RequestMapping)2 NeedInformationService (won.protocol.service.NeedInformationService)2 DistributedPubSubMediator (akka.cluster.pubsub.DistributedPubSubMediator)1 Lock (org.apache.jena.shared.Lock)1 ResponseEntity (org.springframework.http.ResponseEntity)1 RestClientException (org.springframework.web.client.RestClientException)1 NeedEvent (won.matcher.service.common.event.NeedEvent)1 CrawlWrapperException (won.matcher.service.crawler.exception.CrawlWrapperException)1 CrawlUriMessage (won.matcher.service.crawler.msg.CrawlUriMessage)1 ResourceCrawlUriMessage (won.matcher.service.crawler.msg.ResourceCrawlUriMessage)1 IncorrectPropertyCountException (won.protocol.exception.IncorrectPropertyCountException)1 DatasetResponseWithStatusCodeAndHeaders (won.protocol.rest.DatasetResponseWithStatusCodeAndHeaders)1 NeedModelWrapper (won.protocol.util.NeedModelWrapper)1