use of won.matcher.service.common.event.BulkAtomEvent in project webofneeds by researchstudio-sat.
the class SolrMatcherActor method onReceive.
@Override
public void onReceive(final Object o) throws Exception {
String eventTypeForLogging = "unknown";
Optional<String> uriForLogging = Optional.empty();
try {
if (o instanceof AtomEvent) {
eventTypeForLogging = "AtomEvent";
AtomEvent atomEvent = (AtomEvent) o;
uriForLogging = Optional.ofNullable(atomEvent.getUri());
if (atomEvent.getEventType().equals(AtomEvent.TYPE.ACTIVE)) {
processActiveAtomEvent(atomEvent);
} else if (atomEvent.getEventType().equals(AtomEvent.TYPE.INACTIVE)) {
processInactiveAtomEvent(atomEvent);
} else {
unhandled(o);
}
} else if (o instanceof BulkAtomEvent) {
eventTypeForLogging = "BulkAtomEvent";
log.info("received bulk atom event, processing {} atom events ...", ((BulkAtomEvent) o).getAtomEvents().size());
for (AtomEvent event : ((BulkAtomEvent) o).getAtomEvents()) {
processActiveAtomEvent(event);
}
} else {
eventTypeForLogging = "unhandled";
unhandled(o);
}
} catch (Exception e) {
log.info(String.format("Caught exception when processing %s event %s. More info on loglevel 'debug'", eventTypeForLogging, uriForLogging.orElse("[no uri available]")));
log.debug("caught exception", e);
}
}
use of won.matcher.service.common.event.BulkAtomEvent in project webofneeds by researchstudio-sat.
the class AtomEventLoaderActor method onReceive.
@Override
public void onReceive(final Object o) throws Throwable {
if (o instanceof LoadAtomEvent) {
LoadAtomEvent msg = (LoadAtomEvent) o;
log.debug("received request to load atoms events: {}", msg);
BulkAtomEvent bulkAtomEvent;
int offset = 0;
do {
// check if atom event should be returned in time interval or last X atom events
if (msg.getLastXAtomEvents() == -1) {
bulkAtomEvent = sparqlService.retrieveActiveAtomEvents(msg.getFromDate(), msg.getToDate(), offset, MAX_BULK_SIZE, true);
} else {
bulkAtomEvent = sparqlService.retrieveActiveAtomEvents(0, Long.MAX_VALUE, offset, Math.min(MAX_BULK_SIZE, msg.getLastXAtomEvents() - offset), false);
}
if (bulkAtomEvent.getAtomEvents().size() > 0) {
log.debug("send bulk event of size {} back to requesting actor", bulkAtomEvent.getAtomEvents().size());
getSender().tell(bulkAtomEvent, getSelf());
offset += bulkAtomEvent.getAtomEvents().size();
}
} while (bulkAtomEvent.getAtomEvents().size() == MAX_BULK_SIZE);
}
}
use of won.matcher.service.common.event.BulkAtomEvent in project webofneeds by researchstudio-sat.
the class CrawlSparqlService method retrieveActiveAtomEvents.
public BulkAtomEvent retrieveActiveAtomEvents(long fromDate, long toDate, int offset, int limit, boolean sortAscending) {
// query template to retrieve all alctive cralwed/saved atoms in a certain date
// range
String orderClause = sortAscending ? "ORDER BY ?date\n" : "ORDER BY DESC(?date)\n";
logger.debug("bulk load atom data from sparql endpoint in date range: [{},{}]", fromDate, toDate);
String queryTemplate = "SELECT ?atomUri ?wonNodeUri ?date WHERE { \n" + " ?atomUri a won:Atom. \n" + " ?atomUri won:crawlDate ?date. \n" + " ?atomUri won:atomState won:Active. \n" + " ?atomUri won:wonNode ?wonNodeUri. \n" + " {?atomUri won:crawlStatus 'SAVE'.} UNION {?atomUri won:crawlStatus 'DONE'.}\n" + " FILTER (?date >= ?fromDate && ?date < ?toDate ) \n" + "} " + orderClause + " OFFSET ?offset\n" + " LIMIT ?limit";
ParameterizedSparqlString pps = new ParameterizedSparqlString();
pps.setNsPrefix("won", "https://w3id.org/won/core#");
pps.setCommandText(queryTemplate);
pps.setLiteral("fromDate", fromDate);
pps.setLiteral("toDate", toDate);
pps.setLiteral("offset", offset);
pps.setLiteral("limit", limit);
logger.debug("Query SPARQL Endpoint: {}", sparqlEndpoint);
logger.debug("Execute query: {}", pps.toString());
try (QueryExecution qexec = QueryExecutionFactory.sparqlService(sparqlEndpoint, pps.asQuery())) {
ResultSet results = qexec.execSelect();
// load all the atoms into one bulk atom event
BulkAtomEvent bulkAtomEvent = new BulkAtomEvent();
while (results.hasNext()) {
QuerySolution qs = results.nextSolution();
String atomUri = qs.get("atomUri").asResource().getURI();
String wonNodeUri = qs.get("wonNodeUri").asResource().getURI();
long crawlDate = qs.getLiteral("date").getLong();
Dataset ds = retrieveAtomDataset(atomUri);
if (AtomModelWrapper.isAAtom(ds)) {
StringWriter sw = new StringWriter();
RDFDataMgr.write(sw, ds, RDFFormat.TRIG.getLang());
AtomEvent atomEvent = new AtomEvent(atomUri, wonNodeUri, AtomEvent.TYPE.ACTIVE, crawlDate, sw.toString(), RDFFormat.TRIG.getLang(), Cause.CRAWLED);
bulkAtomEvent.addAtomEvent(atomEvent);
}
}
logger.debug("number of atom events created: " + bulkAtomEvent.getAtomEvents().size());
return bulkAtomEvent;
}
}
use of won.matcher.service.common.event.BulkAtomEvent in project webofneeds by researchstudio-sat.
the class MatcherPubSubActor method onReceive.
@Override
public void onReceive(Object o) throws Exception {
if (o.equals(TICK)) {
if (!atomsUpdateRequestReceived) {
// request missing atom events from matching service while this matcher was not
// available
long lastSeenAtomDate = Long.valueOf(appStateProps.getProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME));
LoadAtomEvent loadAtomEvent;
if (lastSeenAtomDate == -1) {
// request the last one atom event from matching service and accept every atom
// event timestamp
loadAtomEvent = new LoadAtomEvent(1);
} else {
// request atom events with date > last atom event date
log.info("request missed atoms from matching service with crawl date > {}", lastSeenAtomDate);
loadAtomEvent = new LoadAtomEvent(lastSeenAtomDate, Long.MAX_VALUE);
}
pubSubMediator.tell(new DistributedPubSubMediator.Publish(loadAtomEvent.getClass().getName(), loadAtomEvent), getSelf());
}
} else if (o instanceof AtomEvent) {
AtomEvent atomEvent = (AtomEvent) o;
log.info("AtomEvent received: " + atomEvent);
// matching service
if (atomsUpdateRequestReceived) {
long lastSeenAtomDate = Long.valueOf(appStateProps.getProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME));
if (atomEvent.getCrawlDate() > lastSeenAtomDate) {
appStateProps.setProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME, String.valueOf(atomEvent.getCrawlDate()));
saveLastSeenAtomDate();
}
}
matcherActor.tell(atomEvent, getSelf());
} else if (o instanceof BulkAtomEvent) {
// receiving a bulk atom event means this is the answer for the request of atom
// updates
// there could arrive several of these bulk events
atomsUpdateRequestReceived = true;
BulkAtomEvent bulkAtomEvent = (BulkAtomEvent) o;
log.info("BulkAtomEvent received with {} atom events", bulkAtomEvent.getAtomEvents().size());
for (AtomEvent atomEvent : ((BulkAtomEvent) o).getAtomEvents()) {
long lastSeenAtomDate = Long.valueOf(appStateProps.getProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME));
if (atomEvent.getCrawlDate() > lastSeenAtomDate) {
appStateProps.setProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME, String.valueOf(atomEvent.getCrawlDate()));
saveLastSeenAtomDate();
}
matcherActor.tell(atomEvent, getSelf());
}
} else if (o instanceof HintEvent) {
HintEvent hintEvent = (HintEvent) o;
log.info("Publish hint event: " + hintEvent);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(hintEvent.getClass().getName(), hintEvent), getSelf());
} else if (o instanceof BulkHintEvent) {
BulkHintEvent bulkHintEvent = (BulkHintEvent) o;
log.info("Publish bulk hint event: " + bulkHintEvent);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(bulkHintEvent.getClass().getName(), bulkHintEvent), getSelf());
} else {
unhandled(o);
}
}
use of won.matcher.service.common.event.BulkAtomEvent in project webofneeds by researchstudio-sat.
the class RematchSparqlService method findAtomsForRematching.
public Set<BulkAtomEvent> findAtomsForRematching() {
logger.debug("searching atoms for rematching");
StringBuilder builder = new StringBuilder();
// Selects atomUris using a back-off strategy, each time doubling
// the time difference to the reference date
builder.append(" prefix won: <https://w3id.org/won/core#> \n");
builder.append(" select distinct ?atomUri where { \n");
builder.append(" graph won:rematchMetadata { \n");
builder.append(" ?atomUri won:referenceDate ?rDate ; \n");
builder.append(" won:matchAttemptDate ?mDate . \n");
builder.append(" filter (?mDate >= ?rDate) \n");
builder.append(" bind (?mDate - ?rDate as ?lastDiff) \n");
builder.append(" bind (?now - ?rDate as ?diff) \n");
builder.append(" } \n");
builder.append(" \n");
builder.append(" filter(?diff > 2 * ?lastDiff) \n");
builder.append(" } \n");
ParameterizedSparqlString pps = new ParameterizedSparqlString();
pps.setNsPrefix("won", "https://w3id.org/won/core#");
pps.setCommandText(builder.toString());
pps.setLiteral("now", System.currentTimeMillis());
Set<BulkAtomEvent> bulks = new HashSet<>();
BulkAtomEvent bulkAtomEvent = new BulkAtomEvent();
bulks.add(bulkAtomEvent);
try (QueryExecution qexec = QueryExecutionFactory.sparqlService(sparqlEndpoint, pps.asQuery())) {
ResultSet results = qexec.execSelect();
// load all the atoms into one bulk atom event
while (results.hasNext()) {
QuerySolution qs = results.nextSolution();
String atomUri = qs.get("atomUri").asResource().getURI();
try {
if (logger.isDebugEnabled()) {
logger.debug("Rematching {}, fetching its data...", atomUri);
}
Dataset ds = linkedDataSource.getDataForPublicResource(URI.create(atomUri));
if (AtomModelWrapper.isAAtom(ds)) {
StringWriter sw = new StringWriter();
RDFDataMgr.write(sw, ds, RDFFormat.TRIG.getLang());
AtomEvent atomEvent = new AtomEvent(atomUri, null, AtomEvent.TYPE.ACTIVE, System.currentTimeMillis(), sw.toString(), RDFFormat.TRIG.getLang(), Cause.SCHEDULED_FOR_REMATCH);
bulkAtomEvent.addAtomEvent(atomEvent);
if (bulkAtomEvent.getAtomEvents().size() >= MAX_ATOMS_PER_REMATCH_BULK) {
bulkAtomEvent = new BulkAtomEvent();
bulks.add(bulkAtomEvent);
}
}
} catch (LinkedDataFetchingException e) {
if (e.getStatusCode().isPresent()) {
HttpStatus status = HttpStatus.valueOf(e.getStatusCode().get());
if (status == HttpStatus.GONE) {
if (logger.isDebugEnabled()) {
logger.debug("Rematching {}: got response status {}, removing resource from index", atomUri, status);
}
// add the event indicating that the atom was deleted
bulkAtomEvent.addAtomEvent(new AtomEvent(atomUri, null, TYPE.DELETED, System.currentTimeMillis(), Cause.SCHEDULED_FOR_REMATCH));
}
}
if (logger.isDebugEnabled()) {
logger.debug("Rematching {}: error retrieving linked data - not rematching at this time, will try again later", atomUri);
}
// do not add event, just directly register a matching attempt
registerMatchingAttempt(TYPE.ACTIVE, atomUri, Cause.SCHEDULED_FOR_REMATCH);
}
}
}
logger.debug("atomEvents for rematching: " + bulkAtomEvent.getAtomEvents().size());
return bulks;
}
Aggregations