Search in sources :

Example 11 with AtomEvent

use of won.matcher.service.common.event.AtomEvent in project webofneeds by researchstudio-sat.

the class MatcherPubSubActor method onReceive.

@Override
public void onReceive(Object o) throws Exception {
    if (o.equals(TICK)) {
        if (!atomsUpdateRequestReceived) {
            // request missing atom events from matching service while this matcher was not
            // available
            long lastSeenAtomDate = Long.valueOf(appStateProps.getProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME));
            LoadAtomEvent loadAtomEvent;
            if (lastSeenAtomDate == -1) {
                // request the last one atom event from matching service and accept every atom
                // event timestamp
                loadAtomEvent = new LoadAtomEvent(1);
            } else {
                // request atom events with date > last atom event date
                log.info("request missed atoms from matching service with crawl date > {}", lastSeenAtomDate);
                loadAtomEvent = new LoadAtomEvent(lastSeenAtomDate, Long.MAX_VALUE);
            }
            pubSubMediator.tell(new DistributedPubSubMediator.Publish(loadAtomEvent.getClass().getName(), loadAtomEvent), getSelf());
        }
    } else if (o instanceof AtomEvent) {
        AtomEvent atomEvent = (AtomEvent) o;
        log.info("AtomEvent received: " + atomEvent);
        // matching service
        if (atomsUpdateRequestReceived) {
            long lastSeenAtomDate = Long.valueOf(appStateProps.getProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME));
            if (atomEvent.getCrawlDate() > lastSeenAtomDate) {
                appStateProps.setProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME, String.valueOf(atomEvent.getCrawlDate()));
                saveLastSeenAtomDate();
            }
        }
        matcherActor.tell(atomEvent, getSelf());
    } else if (o instanceof BulkAtomEvent) {
        // receiving a bulk atom event means this is the answer for the request of atom
        // updates
        // there could arrive several of these bulk events
        atomsUpdateRequestReceived = true;
        BulkAtomEvent bulkAtomEvent = (BulkAtomEvent) o;
        log.info("BulkAtomEvent received with {} atom events", bulkAtomEvent.getAtomEvents().size());
        for (AtomEvent atomEvent : ((BulkAtomEvent) o).getAtomEvents()) {
            long lastSeenAtomDate = Long.valueOf(appStateProps.getProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME));
            if (atomEvent.getCrawlDate() > lastSeenAtomDate) {
                appStateProps.setProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME, String.valueOf(atomEvent.getCrawlDate()));
                saveLastSeenAtomDate();
            }
            matcherActor.tell(atomEvent, getSelf());
        }
    } else if (o instanceof HintEvent) {
        HintEvent hintEvent = (HintEvent) o;
        log.info("Publish hint event: " + hintEvent);
        pubSubMediator.tell(new DistributedPubSubMediator.Publish(hintEvent.getClass().getName(), hintEvent), getSelf());
    } else if (o instanceof BulkHintEvent) {
        BulkHintEvent bulkHintEvent = (BulkHintEvent) o;
        log.info("Publish bulk hint event: " + bulkHintEvent);
        pubSubMediator.tell(new DistributedPubSubMediator.Publish(bulkHintEvent.getClass().getName(), bulkHintEvent), getSelf());
    } else {
        unhandled(o);
    }
}
Also used : BulkAtomEvent(won.matcher.service.common.event.BulkAtomEvent) BulkHintEvent(won.matcher.service.common.event.BulkHintEvent) DistributedPubSubMediator(akka.cluster.pubsub.DistributedPubSubMediator) BulkAtomEvent(won.matcher.service.common.event.BulkAtomEvent) LoadAtomEvent(won.matcher.service.common.event.LoadAtomEvent) AtomEvent(won.matcher.service.common.event.AtomEvent) LoadAtomEvent(won.matcher.service.common.event.LoadAtomEvent) HintEvent(won.matcher.service.common.event.HintEvent) BulkHintEvent(won.matcher.service.common.event.BulkHintEvent)

Example 12 with AtomEvent

use of won.matcher.service.common.event.AtomEvent in project webofneeds by researchstudio-sat.

the class RematchSparqlService method findAtomsForRematching.

public Set<BulkAtomEvent> findAtomsForRematching() {
    logger.debug("searching atoms for rematching");
    StringBuilder builder = new StringBuilder();
    // Selects atomUris using a back-off strategy, each time doubling
    // the time difference to the reference date
    builder.append(" prefix won: <https://w3id.org/won/core#> \n");
    builder.append(" select distinct ?atomUri where {  \n");
    builder.append("    graph won:rematchMetadata { \n");
    builder.append("        ?atomUri won:referenceDate ?rDate ; \n");
    builder.append("                  won:matchAttemptDate ?mDate . \n");
    builder.append("         filter (?mDate >= ?rDate) \n");
    builder.append("         bind (?mDate - ?rDate as ?lastDiff) \n");
    builder.append("         bind (?now - ?rDate as ?diff) \n");
    builder.append("     } \n");
    builder.append("  \n");
    builder.append("     filter(?diff > 2 * ?lastDiff) \n");
    builder.append(" } \n");
    ParameterizedSparqlString pps = new ParameterizedSparqlString();
    pps.setNsPrefix("won", "https://w3id.org/won/core#");
    pps.setCommandText(builder.toString());
    pps.setLiteral("now", System.currentTimeMillis());
    Set<BulkAtomEvent> bulks = new HashSet<>();
    BulkAtomEvent bulkAtomEvent = new BulkAtomEvent();
    bulks.add(bulkAtomEvent);
    try (QueryExecution qexec = QueryExecutionFactory.sparqlService(sparqlEndpoint, pps.asQuery())) {
        ResultSet results = qexec.execSelect();
        // load all the atoms into one bulk atom event
        while (results.hasNext()) {
            QuerySolution qs = results.nextSolution();
            String atomUri = qs.get("atomUri").asResource().getURI();
            try {
                if (logger.isDebugEnabled()) {
                    logger.debug("Rematching {}, fetching its data...", atomUri);
                }
                Dataset ds = linkedDataSource.getDataForPublicResource(URI.create(atomUri));
                if (AtomModelWrapper.isAAtom(ds)) {
                    StringWriter sw = new StringWriter();
                    RDFDataMgr.write(sw, ds, RDFFormat.TRIG.getLang());
                    AtomEvent atomEvent = new AtomEvent(atomUri, null, AtomEvent.TYPE.ACTIVE, System.currentTimeMillis(), sw.toString(), RDFFormat.TRIG.getLang(), Cause.SCHEDULED_FOR_REMATCH);
                    bulkAtomEvent.addAtomEvent(atomEvent);
                    if (bulkAtomEvent.getAtomEvents().size() >= MAX_ATOMS_PER_REMATCH_BULK) {
                        bulkAtomEvent = new BulkAtomEvent();
                        bulks.add(bulkAtomEvent);
                    }
                }
            } catch (LinkedDataFetchingException e) {
                if (e.getStatusCode().isPresent()) {
                    HttpStatus status = HttpStatus.valueOf(e.getStatusCode().get());
                    if (status == HttpStatus.GONE) {
                        if (logger.isDebugEnabled()) {
                            logger.debug("Rematching {}: got response status {}, removing resource from index", atomUri, status);
                        }
                        // add the event indicating that the atom was deleted
                        bulkAtomEvent.addAtomEvent(new AtomEvent(atomUri, null, TYPE.DELETED, System.currentTimeMillis(), Cause.SCHEDULED_FOR_REMATCH));
                    }
                }
                if (logger.isDebugEnabled()) {
                    logger.debug("Rematching {}: error retrieving linked data - not rematching at this time, will try again later", atomUri);
                }
                // do not add event, just directly register a matching attempt
                registerMatchingAttempt(TYPE.ACTIVE, atomUri, Cause.SCHEDULED_FOR_REMATCH);
            }
        }
    }
    logger.debug("atomEvents for rematching: " + bulkAtomEvent.getAtomEvents().size());
    return bulks;
}
Also used : HttpStatus(org.springframework.http.HttpStatus) BulkAtomEvent(won.matcher.service.common.event.BulkAtomEvent) StringWriter(java.io.StringWriter) BulkAtomEvent(won.matcher.service.common.event.BulkAtomEvent) AtomEvent(won.matcher.service.common.event.AtomEvent) LinkedDataFetchingException(won.protocol.rest.LinkedDataFetchingException) HashSet(java.util.HashSet)

Example 13 with AtomEvent

use of won.matcher.service.common.event.AtomEvent in project webofneeds by researchstudio-sat.

the class RematchSparqlService method createMatchAttemptUpdate.

private Optional<String> createMatchAttemptUpdate(AtomEvent msg) {
    AtomEvent.TYPE eventType = msg.getEventType();
    String uri = msg.getUri();
    Cause cause = msg.getCause();
    return createMatchAttemptUpdate(eventType, uri, cause);
}
Also used : BulkAtomEvent(won.matcher.service.common.event.BulkAtomEvent) AtomEvent(won.matcher.service.common.event.AtomEvent) Cause(won.matcher.service.common.event.Cause) TYPE(won.matcher.service.common.event.AtomEvent.TYPE)

Example 14 with AtomEvent

use of won.matcher.service.common.event.AtomEvent in project webofneeds by researchstudio-sat.

the class MatcherPubSubActor method onReceive.

@Override
public void onReceive(Object o) throws Exception {
    if (o.equals(TICK)) {
        if (!atomsUpdateRequestReceived) {
            // request missing atom events from matching service while this matcher was not
            // available
            long lastSeenAtomDate = Long.valueOf(appStateProps.getProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME));
            LoadAtomEvent loadAtomEvent;
            if (lastSeenAtomDate == -1) {
                // request the last one atom event from matching service and accept every atom
                // event timestamp
                loadAtomEvent = new LoadAtomEvent(1);
            } else {
                // request atom events with date > last atom event date
                log.info("request missed atoms from matching service with crawl date > {}", lastSeenAtomDate);
                loadAtomEvent = new LoadAtomEvent(lastSeenAtomDate, Long.MAX_VALUE);
            }
            pubSubMediator.tell(new DistributedPubSubMediator.Publish(loadAtomEvent.getClass().getName(), loadAtomEvent), getSelf());
        }
    } else if (o instanceof AtomEvent) {
        AtomEvent atomEvent = (AtomEvent) o;
        log.info("AtomEvent received: " + atomEvent);
        // matching service
        if (atomsUpdateRequestReceived) {
            long lastSeenAtomDate = Long.valueOf(appStateProps.getProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME));
            if (atomEvent.getCrawlDate() > lastSeenAtomDate) {
                appStateProps.setProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME, String.valueOf(atomEvent.getCrawlDate()));
                saveLastSeenAtomDate();
            }
        }
        matcherActor.tell(atomEvent, getSelf());
    } else if (o instanceof BulkAtomEvent) {
        // receiving a bulk atom event means this is the answer for the request of atom
        // updates
        // there could arrive several of these bulk events
        atomsUpdateRequestReceived = true;
        BulkAtomEvent bulkAtomEvent = (BulkAtomEvent) o;
        log.info("BulkAtomEvent received with {} atom events", bulkAtomEvent.getAtomEvents().size());
        for (AtomEvent atomEvent : ((BulkAtomEvent) o).getAtomEvents()) {
            long lastSeenAtomDate = Long.valueOf(appStateProps.getProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME));
            if (atomEvent.getCrawlDate() > lastSeenAtomDate) {
                appStateProps.setProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME, String.valueOf(atomEvent.getCrawlDate()));
                saveLastSeenAtomDate();
            }
            matcherActor.tell(atomEvent, getSelf());
        }
    } else if (o instanceof HintEvent) {
        HintEvent hintEvent = (HintEvent) o;
        log.info("Publish hint event: " + hintEvent);
        pubSubMediator.tell(new DistributedPubSubMediator.Publish(hintEvent.getClass().getName(), hintEvent), getSelf());
    } else if (o instanceof BulkHintEvent) {
        BulkHintEvent bulkHintEvent = (BulkHintEvent) o;
        log.info("Publish bulk hint event: " + bulkHintEvent);
        pubSubMediator.tell(new DistributedPubSubMediator.Publish(bulkHintEvent.getClass().getName(), bulkHintEvent), getSelf());
    } else {
        unhandled(o);
    }
}
Also used : BulkAtomEvent(won.matcher.service.common.event.BulkAtomEvent) BulkHintEvent(won.matcher.service.common.event.BulkHintEvent) DistributedPubSubMediator(akka.cluster.pubsub.DistributedPubSubMediator) BulkAtomEvent(won.matcher.service.common.event.BulkAtomEvent) LoadAtomEvent(won.matcher.service.common.event.LoadAtomEvent) AtomEvent(won.matcher.service.common.event.AtomEvent) LoadAtomEvent(won.matcher.service.common.event.LoadAtomEvent) HintEvent(won.matcher.service.common.event.HintEvent) BulkHintEvent(won.matcher.service.common.event.BulkHintEvent)

Aggregations

AtomEvent (won.matcher.service.common.event.AtomEvent)14 BulkAtomEvent (won.matcher.service.common.event.BulkAtomEvent)6 Dataset (org.apache.jena.query.Dataset)5 DistributedPubSubMediator (akka.cluster.pubsub.DistributedPubSubMediator)4 IOException (java.io.IOException)3 ActorRef (akka.actor.ActorRef)2 ActorSystem (akka.actor.ActorSystem)2 InputStream (java.io.InputStream)2 StringWriter (java.io.StringWriter)2 AnnotationConfigApplicationContext (org.springframework.context.annotation.AnnotationConfigApplicationContext)2 BulkHintEvent (won.matcher.service.common.event.BulkHintEvent)2 HintEvent (won.matcher.service.common.event.HintEvent)2 LoadAtomEvent (won.matcher.service.common.event.LoadAtomEvent)2 ResourceCrawlUriMessage (won.matcher.service.crawler.msg.ResourceCrawlUriMessage)2 LinkedDataFetchingException (won.protocol.rest.LinkedDataFetchingException)2 CamelMessage (akka.camel.CamelMessage)1 HashSet (java.util.HashSet)1 Lock (org.apache.jena.shared.Lock)1 SolrServerException (org.apache.solr.client.solrj.SolrServerException)1 HttpHeaders (org.springframework.http.HttpHeaders)1