use of won.matcher.service.common.event.NeedEvent in project webofneeds by researchstudio-sat.
the class CrawlSparqlService method retrieveActiveNeedEvents.
public BulkNeedEvent retrieveActiveNeedEvents(long fromDate, long toDate, int offset, int limit, boolean sortAscending) {
// query template to retrieve all alctive cralwed/saved needs in a certain date range
String orderClause = sortAscending ? "ORDER BY ?date\n" : "ORDER BY DESC(?date)\n";
log.debug("bulk load need data from sparql endpoint in date range: [{},{}]", fromDate, toDate);
String queryTemplate = "SELECT ?needUri ?wonNodeUri ?date WHERE { \n" + " ?needUri a won:Need. \n" + " ?needUri won:crawlDate ?date. \n" + " ?needUri won:isInState won:Active. \n" + " ?needUri won:hasWonNode ?wonNodeUri. \n" + " {?needUri won:crawlStatus 'SAVE'.} UNION {?needUri won:crawlStatus 'DONE'.}\n" + " FILTER (?date >= ?fromDate && ?date < ?toDate ) \n" + "} " + orderClause + " OFFSET ?offset\n" + " LIMIT ?limit";
ParameterizedSparqlString pps = new ParameterizedSparqlString();
pps.setNsPrefix("won", "http://purl.org/webofneeds/model#");
pps.setCommandText(queryTemplate);
pps.setLiteral("fromDate", fromDate);
pps.setLiteral("toDate", toDate);
pps.setLiteral("offset", offset);
pps.setLiteral("limit", limit);
log.debug("Query SPARQL Endpoint: {}", sparqlEndpoint);
log.debug("Execute query: {}", pps.toString());
QueryExecution qexec = QueryExecutionFactory.sparqlService(sparqlEndpoint, pps.asQuery());
ResultSet results = qexec.execSelect();
// load all the needs into one bulk need event
BulkNeedEvent bulkNeedEvent = new BulkNeedEvent();
while (results.hasNext()) {
QuerySolution qs = results.nextSolution();
String needUri = qs.get("needUri").asResource().getURI();
String wonNodeUri = qs.get("wonNodeUri").asResource().getURI();
long crawlDate = qs.getLiteral("date").getLong();
Dataset ds = retrieveNeedDataset(needUri);
StringWriter sw = new StringWriter();
RDFDataMgr.write(sw, ds, RDFFormat.TRIG.getLang());
NeedEvent needEvent = new NeedEvent(needUri, wonNodeUri, NeedEvent.TYPE.ACTIVE, crawlDate, sw.toString(), RDFFormat.TRIG.getLang());
bulkNeedEvent.addNeedEvent(needEvent);
}
qexec.close();
log.debug("number of need events created: " + bulkNeedEvent.getNeedEvents().size());
return bulkNeedEvent;
}
use of won.matcher.service.common.event.NeedEvent in project webofneeds by researchstudio-sat.
the class NeedConsumerProtocolActor method onReceive.
@Override
public void onReceive(final Object message) throws Exception {
if (message instanceof CamelMessage) {
CamelMessage camelMsg = (CamelMessage) message;
String needUri = (String) camelMsg.getHeaders().get(MSG_HEADER_NEED_URI);
String wonNodeUri = (String) camelMsg.getHeaders().get(MSG_HEADER_WON_NODE_URI);
// monitoring code
monitoringService.startClock(MonitoringService.NEED_HINT_STOPWATCH, needUri);
// process the incoming need event
if (needUri != null && wonNodeUri != null) {
Object methodName = camelMsg.getHeaders().get(MSG_HEADER_METHODNAME);
if (methodName != null) {
log.debug("Received event '{}' for needUri '{}' and wonNeedUri '{}' and publish it to matchers", methodName, needUri, wonNodeUri);
// publish a need event to all the (distributed) matchers
NeedEvent event = null;
long crawlDate = System.currentTimeMillis();
if (methodName.equals(MSG_HEADER_METHODNAME_NEEDCREATED)) {
event = new NeedEvent(needUri, wonNodeUri, NeedEvent.TYPE.ACTIVE, crawlDate, camelMsg.body().toString(), Lang.TRIG);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(event.getClass().getName(), event), getSelf());
} else if (methodName.equals(MSG_HEADER_METHODNAME_NEEDACTIVATED)) {
event = new NeedEvent(needUri, wonNodeUri, NeedEvent.TYPE.ACTIVE, crawlDate, camelMsg.body().toString(), Lang.TRIG);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(event.getClass().getName(), event), getSelf());
} else if (methodName.equals(MSG_HEADER_METHODNAME_NEEDDEACTIVATED)) {
event = new NeedEvent(needUri, wonNodeUri, NeedEvent.TYPE.INACTIVE, crawlDate, camelMsg.body().toString(), Lang.TRIG);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(event.getClass().getName(), event), getSelf());
} else {
unhandled(message);
}
// let the crawler save the data of this event too
ResourceCrawlUriMessage resMsg = new ResourceCrawlUriMessage(needUri, needUri, wonNodeUri, CrawlUriMessage.STATUS.SAVE, crawlDate, null);
resMsg.setSerializedResource(camelMsg.body().toString());
resMsg.setSerializationFormat(Lang.TRIG);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(resMsg.getClass().getName(), resMsg), getSelf());
return;
}
}
}
unhandled(message);
}
use of won.matcher.service.common.event.NeedEvent in project webofneeds by researchstudio-sat.
the class SaveNeedEventActor method onReceive.
@Override
public void onReceive(final Object o) throws Exception {
if (o instanceof NeedEvent) {
NeedEvent needEvent = (NeedEvent) o;
// save the need
log.debug("Save need event {} to sparql endpoint {}", needEvent, sparqlService.getSparqlEndpoint());
Dataset ds = needEvent.deserializeNeedDataset();
sparqlService.updateNamedGraphsOfDataset(ds);
} else {
unhandled(o);
}
}
use of won.matcher.service.common.event.NeedEvent in project webofneeds by researchstudio-sat.
the class WorkerCrawlerActor method crawlUri.
private void crawlUri(CrawlUriMessage uriMsg) {
Dataset ds = null;
List<String> etags = null;
Lock lock = null;
try {
// check if resource is already downloaded
if (uriMsg instanceof ResourceCrawlUriMessage) {
ResourceCrawlUriMessage resMsg = ((ResourceCrawlUriMessage) uriMsg);
if (resMsg.getSerializedResource() != null && resMsg.getSerializationFormat() != null) {
// TODO: this should be optimized, why deserialize the resource here when we just want to save it in the RDF
// store? How to insert this serialized resource into the SPARQL endpoint?
ds = SparqlService.deserializeDataset(resMsg.getSerializedResource(), resMsg.getSerializationFormat());
}
}
// download resource if not already downloaded
if (ds == null) {
// use ETag/If-None-Match Headers to make the process more efficient
HttpHeaders httpHeaders = new HttpHeaders();
if (uriMsg.getResourceETagHeaderValues() != null && !uriMsg.getResourceETagHeaderValues().isEmpty()) {
String ifNoneMatchHeaderValue = StringUtils.collectionToDelimitedString(uriMsg.getResourceETagHeaderValues(), ", ");
httpHeaders.add("If-None-Match", ifNoneMatchHeaderValue);
}
DatasetResponseWithStatusCodeAndHeaders datasetWithHeaders = linkedDataSource.getDatasetWithHeadersForResource(URI.create(uriMsg.getUri()), httpHeaders);
ds = datasetWithHeaders.getDataset();
etags = datasetWithHeaders.getResponseHeaders().get("ETag");
// if dataset was not modified (304) we can treat the current crawl uri as done
if (ds == null && datasetWithHeaders.getStatusCode() == 304) {
sendDoneUriMessage(uriMsg, uriMsg.getWonNodeUri(), etags);
return;
}
// if there is paging activated and the won node tells us that there is more data (previous link)
// to be downloaded, then we add this link to the crawling process too
String prevLink = linkedDataSource.getPreviousLinkFromDatasetWithHeaders(datasetWithHeaders);
if (prevLink != null) {
CrawlUriMessage newUriMsg = new CrawlUriMessage(uriMsg.getBaseUri(), prevLink, uriMsg.getWonNodeUri(), CrawlUriMessage.STATUS.PROCESS, System.currentTimeMillis(), null);
getSender().tell(newUriMsg, getSelf());
}
}
lock = ds == null ? null : ds.getLock();
lock.enterCriticalSection(true);
// Save dataset to triple store
sparqlService.updateNamedGraphsOfDataset(ds);
String wonNodeUri = extractWonNodeUri(ds, uriMsg.getUri());
if (wonNodeUri == null) {
wonNodeUri = uriMsg.getWonNodeUri();
}
// do nothing more here if the STATUS of the message was SAVE
if (uriMsg.getStatus().equals(CrawlUriMessage.STATUS.SAVE)) {
log.debug("processed crawl uri event {} with status 'SAVE'", uriMsg);
return;
}
// extract URIs from current resource and send extracted URI messages back to sender
log.debug("Extract URIs from message {}", uriMsg);
Set<CrawlUriMessage> newCrawlMessages = sparqlService.extractCrawlUriMessages(uriMsg.getBaseUri(), wonNodeUri);
for (CrawlUriMessage newMsg : newCrawlMessages) {
getSender().tell(newMsg, getSelf());
}
// signal sender that this URI is processed and save meta data about crawling the URI.
// This needs to be done after all extracted URI messages have been sent to guarantee consistency
// in case of failure
sendDoneUriMessage(uriMsg, wonNodeUri, etags);
// if this URI/dataset was a need then send an event to the distributed event bu
if (NeedModelWrapper.isANeed(ds)) {
NeedModelWrapper needModelWrapper = new NeedModelWrapper(ds, false);
NeedState state = needModelWrapper.getNeedState();
NeedEvent.TYPE type = state.equals(NeedState.ACTIVE) ? NeedEvent.TYPE.ACTIVE : NeedEvent.TYPE.INACTIVE;
log.debug("Created need event for need uri {}", uriMsg.getUri());
long crawlDate = System.currentTimeMillis();
NeedEvent needEvent = new NeedEvent(uriMsg.getUri(), wonNodeUri, type, crawlDate, ds);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(needEvent.getClass().getName(), needEvent), getSelf());
}
} catch (RestClientException e1) {
// usually happens if the fetch of the dataset fails e.g. HttpServerErrorException, HttpClientErrorException
log.debug("Exception during crawling: " + e1);
throw new CrawlWrapperException(e1, uriMsg);
} catch (Exception e) {
log.debug("Exception during crawling: " + e);
throw new CrawlWrapperException(e, uriMsg);
} finally {
if (lock != null) {
lock.leaveCriticalSection();
}
}
}
use of won.matcher.service.common.event.NeedEvent in project webofneeds by researchstudio-sat.
the class SolrTest method createNeedEvent.
private static NeedEvent createNeedEvent(String path) throws IOException {
InputStream is = null;
Dataset dataset = null;
try {
try {
is = SolrTest.class.getResourceAsStream(path);
dataset = DatasetFactory.create();
RDFDataMgr.read(dataset, is, RDFFormat.TRIG.getLang());
} finally {
if (is != null) {
is.close();
}
}
} catch (IOException e) {
System.err.println(e);
return null;
}
String needUri = WonRdfUtils.NeedUtils.getNeedURI(dataset).toString();
return new NeedEvent(needUri, "no_uri", NeedEvent.TYPE.ACTIVE, System.currentTimeMillis(), dataset);
}
Aggregations