Search in sources :

Example 6 with WonNodeEvent

use of won.matcher.service.common.event.WonNodeEvent in project webofneeds by researchstudio-sat.

the class MasterCrawlerActor method processWonNodeEvent.

/**
 * If events about crawling or skipping certain won nodes occur, keep this information in memory
 *
 * @param event
 */
private void processWonNodeEvent(WonNodeEvent event) throws InterruptedException {
    if (event.getStatus().equals(WonNodeEvent.STATUS.CONNECTED_TO_WON_NODE)) {
        // add the won node to the list of known nodes and start crawling it after 30 seconds
        // to give the matcher implementations (e.g. solr matcher) time to connect to each other before the crawling starts
        log.info("added new won node to set of connected and crawling won nodes: {}", event.getWonNodeUri());
        skipWonNodeUris.remove(event.getWonNodeUri());
        crawlWonNodeUris.add(event.getWonNodeUri());
        WonNodeEvent startCrawlingEvent = new WonNodeEvent(event.getWonNodeUri(), WonNodeEvent.STATUS.START_CRAWLING_WON_NODE, event.getWonNodeInfo());
        getContext().system().scheduler().scheduleOnce(FiniteDuration.create(30, TimeUnit.SECONDS), getSelf(), startCrawlingEvent, getContext().dispatcher(), getSelf());
    } else if (event.getStatus().equals(WonNodeEvent.STATUS.START_CRAWLING_WON_NODE)) {
        startCrawling(event.getWonNodeInfo());
    } else if (event.getStatus().equals(WonNodeEvent.STATUS.SKIP_WON_NODE)) {
        // if we should skip this won node remove it from the known won node list and add it to the skip list
        log.debug("skip crawling won node: {}", event.getWonNodeUri());
        crawlWonNodeUris.remove(event.getWonNodeUri());
        skipWonNodeUris.add(event.getWonNodeUri());
    }
}
Also used : WonNodeEvent(won.matcher.service.common.event.WonNodeEvent)

Example 7 with WonNodeEvent

use of won.matcher.service.common.event.WonNodeEvent in project webofneeds by researchstudio-sat.

the class MasterCrawlerActor method askWonNodeInfoForCrawling.

/**
 * Ask for complete won node info of all known won nodes on the event bus. Do this to initiate the crawling process
 * again. Therefore clear the cache of crawled uris so that they can be crawled again.
 */
private void askWonNodeInfoForCrawling() {
    if (pendingMessages.size() > MIN_PENDING_MESSAGES_TO_SKIP_RECRAWLING) {
        log.warning("Skip crawling cylce since there are currently {} messages in the pending queue. Try to restart " + "crawling again in {} minutes", pendingMessages.size(), config.getRecrawlIntervalDuration().toMinutes());
        return;
    }
    log.info("Start crawling process again. Clear the cached uris and crawling statistics");
    doneMessages.clear();
    failedMessages.clear();
    pendingMessages.clear();
    for (String wonNodeUri : crawlWonNodeUris) {
        log.info("ask for won node info of {}", wonNodeUri);
        WonNodeEvent event = new WonNodeEvent(wonNodeUri, WonNodeEvent.STATUS.GET_WON_NODE_INFO_FOR_CRAWLING);
        pubSubMediator.tell(new DistributedPubSubMediator.Publish(event.getClass().getName(), event), getSelf());
    }
}
Also used : DistributedPubSubMediator(akka.cluster.pubsub.DistributedPubSubMediator) WonNodeEvent(won.matcher.service.common.event.WonNodeEvent)

Aggregations

WonNodeEvent (won.matcher.service.common.event.WonNodeEvent)7 DistributedPubSubMediator (akka.cluster.pubsub.DistributedPubSubMediator)5 WonNodeInfo (won.protocol.service.WonNodeInfo)2 BulkHintEvent (won.matcher.service.common.event.BulkHintEvent)1 HintEvent (won.matcher.service.common.event.HintEvent)1 CrawlUriMessage (won.matcher.service.crawler.msg.CrawlUriMessage)1 ResourceCrawlUriMessage (won.matcher.service.crawler.msg.ResourceCrawlUriMessage)1 WonNodeConnection (won.matcher.service.nodemanager.pojo.WonNodeConnection)1