use of won.matcher.service.common.event.WonNodeEvent in project webofneeds by researchstudio-sat.
the class MasterCrawlerActor method processWonNodeEvent.
/**
* If events about crawling or skipping certain won nodes occur, keep this information in memory
*
* @param event
*/
private void processWonNodeEvent(WonNodeEvent event) throws InterruptedException {
if (event.getStatus().equals(WonNodeEvent.STATUS.CONNECTED_TO_WON_NODE)) {
// add the won node to the list of known nodes and start crawling it after 30 seconds
// to give the matcher implementations (e.g. solr matcher) time to connect to each other before the crawling starts
log.info("added new won node to set of connected and crawling won nodes: {}", event.getWonNodeUri());
skipWonNodeUris.remove(event.getWonNodeUri());
crawlWonNodeUris.add(event.getWonNodeUri());
WonNodeEvent startCrawlingEvent = new WonNodeEvent(event.getWonNodeUri(), WonNodeEvent.STATUS.START_CRAWLING_WON_NODE, event.getWonNodeInfo());
getContext().system().scheduler().scheduleOnce(FiniteDuration.create(30, TimeUnit.SECONDS), getSelf(), startCrawlingEvent, getContext().dispatcher(), getSelf());
} else if (event.getStatus().equals(WonNodeEvent.STATUS.START_CRAWLING_WON_NODE)) {
startCrawling(event.getWonNodeInfo());
} else if (event.getStatus().equals(WonNodeEvent.STATUS.SKIP_WON_NODE)) {
// if we should skip this won node remove it from the known won node list and add it to the skip list
log.debug("skip crawling won node: {}", event.getWonNodeUri());
crawlWonNodeUris.remove(event.getWonNodeUri());
skipWonNodeUris.add(event.getWonNodeUri());
}
}
use of won.matcher.service.common.event.WonNodeEvent in project webofneeds by researchstudio-sat.
the class MasterCrawlerActor method askWonNodeInfoForCrawling.
/**
* Ask for complete won node info of all known won nodes on the event bus. Do this to initiate the crawling process
* again. Therefore clear the cache of crawled uris so that they can be crawled again.
*/
private void askWonNodeInfoForCrawling() {
if (pendingMessages.size() > MIN_PENDING_MESSAGES_TO_SKIP_RECRAWLING) {
log.warning("Skip crawling cylce since there are currently {} messages in the pending queue. Try to restart " + "crawling again in {} minutes", pendingMessages.size(), config.getRecrawlIntervalDuration().toMinutes());
return;
}
log.info("Start crawling process again. Clear the cached uris and crawling statistics");
doneMessages.clear();
failedMessages.clear();
pendingMessages.clear();
for (String wonNodeUri : crawlWonNodeUris) {
log.info("ask for won node info of {}", wonNodeUri);
WonNodeEvent event = new WonNodeEvent(wonNodeUri, WonNodeEvent.STATUS.GET_WON_NODE_INFO_FOR_CRAWLING);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(event.getClass().getName(), event), getSelf());
}
}
Aggregations