Search in sources :

Example 1 with BulkAtomEvent

use of won.matcher.service.common.event.BulkAtomEvent in project webofneeds by researchstudio-sat.

the class SolrMatcherActor method onReceive.

@Override
public void onReceive(final Object o) throws Exception {
    String eventTypeForLogging = "unknown";
    Optional<String> uriForLogging = Optional.empty();
    try {
        if (o instanceof AtomEvent) {
            eventTypeForLogging = "AtomEvent";
            AtomEvent atomEvent = (AtomEvent) o;
            uriForLogging = Optional.ofNullable(atomEvent.getUri());
            if (atomEvent.getEventType().equals(AtomEvent.TYPE.ACTIVE)) {
                processActiveAtomEvent(atomEvent);
            } else if (atomEvent.getEventType().equals(AtomEvent.TYPE.INACTIVE)) {
                processInactiveAtomEvent(atomEvent);
            } else {
                unhandled(o);
            }
        } else if (o instanceof BulkAtomEvent) {
            eventTypeForLogging = "BulkAtomEvent";
            log.info("received bulk atom event, processing {} atom events ...", ((BulkAtomEvent) o).getAtomEvents().size());
            for (AtomEvent event : ((BulkAtomEvent) o).getAtomEvents()) {
                processActiveAtomEvent(event);
            }
        } else {
            eventTypeForLogging = "unhandled";
            unhandled(o);
        }
    } catch (Exception e) {
        log.info(String.format("Caught exception when processing %s event %s. More info on loglevel 'debug'", eventTypeForLogging, uriForLogging.orElse("[no uri available]")));
        log.debug("caught exception", e);
    }
}
Also used : BulkAtomEvent(won.matcher.service.common.event.BulkAtomEvent) BulkAtomEvent(won.matcher.service.common.event.BulkAtomEvent) AtomEvent(won.matcher.service.common.event.AtomEvent) SolrServerException(org.apache.solr.client.solrj.SolrServerException) IOException(java.io.IOException)

Example 2 with BulkAtomEvent

use of won.matcher.service.common.event.BulkAtomEvent in project webofneeds by researchstudio-sat.

the class AtomEventLoaderActor method onReceive.

@Override
public void onReceive(final Object o) throws Throwable {
    if (o instanceof LoadAtomEvent) {
        LoadAtomEvent msg = (LoadAtomEvent) o;
        log.debug("received request to load atoms events: {}", msg);
        BulkAtomEvent bulkAtomEvent;
        int offset = 0;
        do {
            // check if atom event should be returned in time interval or last X atom events
            if (msg.getLastXAtomEvents() == -1) {
                bulkAtomEvent = sparqlService.retrieveActiveAtomEvents(msg.getFromDate(), msg.getToDate(), offset, MAX_BULK_SIZE, true);
            } else {
                bulkAtomEvent = sparqlService.retrieveActiveAtomEvents(0, Long.MAX_VALUE, offset, Math.min(MAX_BULK_SIZE, msg.getLastXAtomEvents() - offset), false);
            }
            if (bulkAtomEvent.getAtomEvents().size() > 0) {
                log.debug("send bulk event of size {} back to requesting actor", bulkAtomEvent.getAtomEvents().size());
                getSender().tell(bulkAtomEvent, getSelf());
                offset += bulkAtomEvent.getAtomEvents().size();
            }
        } while (bulkAtomEvent.getAtomEvents().size() == MAX_BULK_SIZE);
    }
}
Also used : BulkAtomEvent(won.matcher.service.common.event.BulkAtomEvent) LoadAtomEvent(won.matcher.service.common.event.LoadAtomEvent)

Example 3 with BulkAtomEvent

use of won.matcher.service.common.event.BulkAtomEvent in project webofneeds by researchstudio-sat.

the class CrawlSparqlService method retrieveActiveAtomEvents.

public BulkAtomEvent retrieveActiveAtomEvents(long fromDate, long toDate, int offset, int limit, boolean sortAscending) {
    // query template to retrieve all alctive cralwed/saved atoms in a certain date
    // range
    String orderClause = sortAscending ? "ORDER BY ?date\n" : "ORDER BY DESC(?date)\n";
    logger.debug("bulk load atom data from sparql endpoint in date range: [{},{}]", fromDate, toDate);
    String queryTemplate = "SELECT ?atomUri ?wonNodeUri ?date WHERE {  \n" + "  ?atomUri a won:Atom. \n" + "  ?atomUri won:crawlDate ?date.  \n" + "  ?atomUri won:atomState won:Active. \n" + "  ?atomUri won:wonNode ?wonNodeUri. \n" + "  {?atomUri won:crawlStatus 'SAVE'.} UNION {?atomUri won:crawlStatus 'DONE'.}\n" + "  FILTER (?date >= ?fromDate && ?date < ?toDate ) \n" + "} " + orderClause + " OFFSET ?offset\n" + " LIMIT ?limit";
    ParameterizedSparqlString pps = new ParameterizedSparqlString();
    pps.setNsPrefix("won", "https://w3id.org/won/core#");
    pps.setCommandText(queryTemplate);
    pps.setLiteral("fromDate", fromDate);
    pps.setLiteral("toDate", toDate);
    pps.setLiteral("offset", offset);
    pps.setLiteral("limit", limit);
    logger.debug("Query SPARQL Endpoint: {}", sparqlEndpoint);
    logger.debug("Execute query: {}", pps.toString());
    try (QueryExecution qexec = QueryExecutionFactory.sparqlService(sparqlEndpoint, pps.asQuery())) {
        ResultSet results = qexec.execSelect();
        // load all the atoms into one bulk atom event
        BulkAtomEvent bulkAtomEvent = new BulkAtomEvent();
        while (results.hasNext()) {
            QuerySolution qs = results.nextSolution();
            String atomUri = qs.get("atomUri").asResource().getURI();
            String wonNodeUri = qs.get("wonNodeUri").asResource().getURI();
            long crawlDate = qs.getLiteral("date").getLong();
            Dataset ds = retrieveAtomDataset(atomUri);
            if (AtomModelWrapper.isAAtom(ds)) {
                StringWriter sw = new StringWriter();
                RDFDataMgr.write(sw, ds, RDFFormat.TRIG.getLang());
                AtomEvent atomEvent = new AtomEvent(atomUri, wonNodeUri, AtomEvent.TYPE.ACTIVE, crawlDate, sw.toString(), RDFFormat.TRIG.getLang(), Cause.CRAWLED);
                bulkAtomEvent.addAtomEvent(atomEvent);
            }
        }
        logger.debug("number of atom events created: " + bulkAtomEvent.getAtomEvents().size());
        return bulkAtomEvent;
    }
}
Also used : BulkAtomEvent(won.matcher.service.common.event.BulkAtomEvent) StringWriter(java.io.StringWriter) BulkAtomEvent(won.matcher.service.common.event.BulkAtomEvent) AtomEvent(won.matcher.service.common.event.AtomEvent)

Example 4 with BulkAtomEvent

use of won.matcher.service.common.event.BulkAtomEvent in project webofneeds by researchstudio-sat.

the class MatcherPubSubActor method onReceive.

@Override
public void onReceive(Object o) throws Exception {
    if (o.equals(TICK)) {
        if (!atomsUpdateRequestReceived) {
            // request missing atom events from matching service while this matcher was not
            // available
            long lastSeenAtomDate = Long.valueOf(appStateProps.getProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME));
            LoadAtomEvent loadAtomEvent;
            if (lastSeenAtomDate == -1) {
                // request the last one atom event from matching service and accept every atom
                // event timestamp
                loadAtomEvent = new LoadAtomEvent(1);
            } else {
                // request atom events with date > last atom event date
                log.info("request missed atoms from matching service with crawl date > {}", lastSeenAtomDate);
                loadAtomEvent = new LoadAtomEvent(lastSeenAtomDate, Long.MAX_VALUE);
            }
            pubSubMediator.tell(new DistributedPubSubMediator.Publish(loadAtomEvent.getClass().getName(), loadAtomEvent), getSelf());
        }
    } else if (o instanceof AtomEvent) {
        AtomEvent atomEvent = (AtomEvent) o;
        log.info("AtomEvent received: " + atomEvent);
        // matching service
        if (atomsUpdateRequestReceived) {
            long lastSeenAtomDate = Long.valueOf(appStateProps.getProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME));
            if (atomEvent.getCrawlDate() > lastSeenAtomDate) {
                appStateProps.setProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME, String.valueOf(atomEvent.getCrawlDate()));
                saveLastSeenAtomDate();
            }
        }
        matcherActor.tell(atomEvent, getSelf());
    } else if (o instanceof BulkAtomEvent) {
        // receiving a bulk atom event means this is the answer for the request of atom
        // updates
        // there could arrive several of these bulk events
        atomsUpdateRequestReceived = true;
        BulkAtomEvent bulkAtomEvent = (BulkAtomEvent) o;
        log.info("BulkAtomEvent received with {} atom events", bulkAtomEvent.getAtomEvents().size());
        for (AtomEvent atomEvent : ((BulkAtomEvent) o).getAtomEvents()) {
            long lastSeenAtomDate = Long.valueOf(appStateProps.getProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME));
            if (atomEvent.getCrawlDate() > lastSeenAtomDate) {
                appStateProps.setProperty(LAST_SEEN_ATOM_DATE_PROPERTY_NAME, String.valueOf(atomEvent.getCrawlDate()));
                saveLastSeenAtomDate();
            }
            matcherActor.tell(atomEvent, getSelf());
        }
    } else if (o instanceof HintEvent) {
        HintEvent hintEvent = (HintEvent) o;
        log.info("Publish hint event: " + hintEvent);
        pubSubMediator.tell(new DistributedPubSubMediator.Publish(hintEvent.getClass().getName(), hintEvent), getSelf());
    } else if (o instanceof BulkHintEvent) {
        BulkHintEvent bulkHintEvent = (BulkHintEvent) o;
        log.info("Publish bulk hint event: " + bulkHintEvent);
        pubSubMediator.tell(new DistributedPubSubMediator.Publish(bulkHintEvent.getClass().getName(), bulkHintEvent), getSelf());
    } else {
        unhandled(o);
    }
}
Also used : BulkAtomEvent(won.matcher.service.common.event.BulkAtomEvent) BulkHintEvent(won.matcher.service.common.event.BulkHintEvent) DistributedPubSubMediator(akka.cluster.pubsub.DistributedPubSubMediator) BulkAtomEvent(won.matcher.service.common.event.BulkAtomEvent) LoadAtomEvent(won.matcher.service.common.event.LoadAtomEvent) AtomEvent(won.matcher.service.common.event.AtomEvent) LoadAtomEvent(won.matcher.service.common.event.LoadAtomEvent) HintEvent(won.matcher.service.common.event.HintEvent) BulkHintEvent(won.matcher.service.common.event.BulkHintEvent)

Example 5 with BulkAtomEvent

use of won.matcher.service.common.event.BulkAtomEvent in project webofneeds by researchstudio-sat.

the class RematchSparqlService method findAtomsForRematching.

public Set<BulkAtomEvent> findAtomsForRematching() {
    logger.debug("searching atoms for rematching");
    StringBuilder builder = new StringBuilder();
    // Selects atomUris using a back-off strategy, each time doubling
    // the time difference to the reference date
    builder.append(" prefix won: <https://w3id.org/won/core#> \n");
    builder.append(" select distinct ?atomUri where {  \n");
    builder.append("    graph won:rematchMetadata { \n");
    builder.append("        ?atomUri won:referenceDate ?rDate ; \n");
    builder.append("                  won:matchAttemptDate ?mDate . \n");
    builder.append("         filter (?mDate >= ?rDate) \n");
    builder.append("         bind (?mDate - ?rDate as ?lastDiff) \n");
    builder.append("         bind (?now - ?rDate as ?diff) \n");
    builder.append("     } \n");
    builder.append("  \n");
    builder.append("     filter(?diff > 2 * ?lastDiff) \n");
    builder.append(" } \n");
    ParameterizedSparqlString pps = new ParameterizedSparqlString();
    pps.setNsPrefix("won", "https://w3id.org/won/core#");
    pps.setCommandText(builder.toString());
    pps.setLiteral("now", System.currentTimeMillis());
    Set<BulkAtomEvent> bulks = new HashSet<>();
    BulkAtomEvent bulkAtomEvent = new BulkAtomEvent();
    bulks.add(bulkAtomEvent);
    try (QueryExecution qexec = QueryExecutionFactory.sparqlService(sparqlEndpoint, pps.asQuery())) {
        ResultSet results = qexec.execSelect();
        // load all the atoms into one bulk atom event
        while (results.hasNext()) {
            QuerySolution qs = results.nextSolution();
            String atomUri = qs.get("atomUri").asResource().getURI();
            try {
                if (logger.isDebugEnabled()) {
                    logger.debug("Rematching {}, fetching its data...", atomUri);
                }
                Dataset ds = linkedDataSource.getDataForPublicResource(URI.create(atomUri));
                if (AtomModelWrapper.isAAtom(ds)) {
                    StringWriter sw = new StringWriter();
                    RDFDataMgr.write(sw, ds, RDFFormat.TRIG.getLang());
                    AtomEvent atomEvent = new AtomEvent(atomUri, null, AtomEvent.TYPE.ACTIVE, System.currentTimeMillis(), sw.toString(), RDFFormat.TRIG.getLang(), Cause.SCHEDULED_FOR_REMATCH);
                    bulkAtomEvent.addAtomEvent(atomEvent);
                    if (bulkAtomEvent.getAtomEvents().size() >= MAX_ATOMS_PER_REMATCH_BULK) {
                        bulkAtomEvent = new BulkAtomEvent();
                        bulks.add(bulkAtomEvent);
                    }
                }
            } catch (LinkedDataFetchingException e) {
                if (e.getStatusCode().isPresent()) {
                    HttpStatus status = HttpStatus.valueOf(e.getStatusCode().get());
                    if (status == HttpStatus.GONE) {
                        if (logger.isDebugEnabled()) {
                            logger.debug("Rematching {}: got response status {}, removing resource from index", atomUri, status);
                        }
                        // add the event indicating that the atom was deleted
                        bulkAtomEvent.addAtomEvent(new AtomEvent(atomUri, null, TYPE.DELETED, System.currentTimeMillis(), Cause.SCHEDULED_FOR_REMATCH));
                    }
                }
                if (logger.isDebugEnabled()) {
                    logger.debug("Rematching {}: error retrieving linked data - not rematching at this time, will try again later", atomUri);
                }
                // do not add event, just directly register a matching attempt
                registerMatchingAttempt(TYPE.ACTIVE, atomUri, Cause.SCHEDULED_FOR_REMATCH);
            }
        }
    }
    logger.debug("atomEvents for rematching: " + bulkAtomEvent.getAtomEvents().size());
    return bulks;
}
Also used : HttpStatus(org.springframework.http.HttpStatus) BulkAtomEvent(won.matcher.service.common.event.BulkAtomEvent) StringWriter(java.io.StringWriter) BulkAtomEvent(won.matcher.service.common.event.BulkAtomEvent) AtomEvent(won.matcher.service.common.event.AtomEvent) LinkedDataFetchingException(won.protocol.rest.LinkedDataFetchingException) HashSet(java.util.HashSet)

Aggregations

BulkAtomEvent (won.matcher.service.common.event.BulkAtomEvent)7 AtomEvent (won.matcher.service.common.event.AtomEvent)6 StringWriter (java.io.StringWriter)3 LoadAtomEvent (won.matcher.service.common.event.LoadAtomEvent)3 DistributedPubSubMediator (akka.cluster.pubsub.DistributedPubSubMediator)2 HashSet (java.util.HashSet)2 HttpStatus (org.springframework.http.HttpStatus)2 BulkHintEvent (won.matcher.service.common.event.BulkHintEvent)2 HintEvent (won.matcher.service.common.event.HintEvent)2 LinkedDataFetchingException (won.protocol.rest.LinkedDataFetchingException)2 IOException (java.io.IOException)1 MethodHandles (java.lang.invoke.MethodHandles)1 URI (java.net.URI)1 Optional (java.util.Optional)1 Set (java.util.Set)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 Collectors (java.util.stream.Collectors)1 org.apache.jena.query (org.apache.jena.query)1 RDFDataMgr (org.apache.jena.riot.RDFDataMgr)1 RDFFormat (org.apache.jena.riot.RDFFormat)1