use of won.matcher.service.common.event.WonNodeEvent in project webofneeds by researchstudio-sat.
the class WonNodeControllerActor method onReceive.
/**
* Receive messages about newly discovered won node and decide to crawl or skip
* processing these won nodes.
*
* @param message
* @throws Exception
*/
@Override
public void onReceive(final Object message) {
if (message instanceof Terminated) {
// if it is some other actor handle it differently
handleConnectionErrors((Terminated) message);
return;
}
if (message.equals(LIFE_CHECK_TICK)) {
lifeCheck();
return;
}
if (message instanceof WonNodeEvent) {
WonNodeEvent event = (WonNodeEvent) message;
if (event.getStatus().equals(WonNodeEvent.STATUS.NEW_WON_NODE_DISCOVERED) || event.getStatus().equals(WonNodeEvent.STATUS.GET_WON_NODE_INFO_FOR_CRAWLING) || event.getStatus().equals(WonNodeEvent.STATUS.RETRY_REGISTER_FAILED_WON_NODE)) {
// won node has already been discovered and connected
if (crawlWonNodes.containsKey(event.getWonNodeUri())) {
log.debug("Won node uri '{}' already discovered", event.getWonNodeUri());
if (event.getStatus().equals(WonNodeEvent.STATUS.GET_WON_NODE_INFO_FOR_CRAWLING)) {
WonNodeInfo wonNodeInfo = crawlWonNodes.get(event.getWonNodeUri()).getWonNodeInfo();
WonNodeEvent e = new WonNodeEvent(event.getWonNodeUri(), WonNodeEvent.STATUS.CONNECTED_TO_WON_NODE, wonNodeInfo);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(e.getClass().getName(), e), getSelf());
}
return;
}
// skip crawling of won nodes in the skip list
if (skipWonNodeUris.contains(event.getWonNodeUri())) {
log.debug("Skip crawling won node with uri '{}'", event.getWonNodeUri());
WonNodeEvent e = new WonNodeEvent(event.getWonNodeUri(), WonNodeEvent.STATUS.SKIP_WON_NODE);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(e.getClass().getName(), e), getSelf());
return;
}
// shall we try to connect to the won node or has it failed already ?
if (failedWonNodeUris.contains(event.getWonNodeUri())) {
log.debug("Suppress connection to already failed won node with uri {} , will try to connect later ...", event.getWonNodeUri());
return;
}
// try the connect to won node
boolean logRegisterWarningForWonNode = event.getStatus().equals(WonNodeEvent.STATUS.RETRY_REGISTER_FAILED_WON_NODE);
WonNodeConnection wonNodeConnection = addWonNodeForCrawling(event.getWonNodeUri(), logRegisterWarningForWonNode);
// connection failed ?
if (failedWonNodeUris.contains(event.getWonNodeUri())) {
log.debug("Still could not connect to won node with uri: {}, will retry later ...", event.getWonNodeUri());
return;
}
// tell the crawler about discovered won nodes
if (wonNodeConnection == null || wonNodeConnection.getWonNodeInfo() == null) {
log.error("Cannot retrieve won node info from won node connection!");
return;
}
WonNodeEvent e = new WonNodeEvent(event.getWonNodeUri(), WonNodeEvent.STATUS.CONNECTED_TO_WON_NODE, wonNodeConnection.getWonNodeInfo());
pubSubMediator.tell(new DistributedPubSubMediator.Publish(e.getClass().getName(), e), getSelf());
return;
}
}
// send back hints to won nodes
if (message instanceof HintEvent) {
processHint((HintEvent) message);
return;
} else if (message instanceof BulkHintEvent) {
BulkHintEvent bulkHintEvent = (BulkHintEvent) message;
for (HintEvent hint : bulkHintEvent.getHintEvents()) {
processHint(hint);
}
return;
}
unhandled(message);
}
use of won.matcher.service.common.event.WonNodeEvent in project webofneeds by researchstudio-sat.
the class WonNodeControllerActor method preStart.
@Override
public void preStart() {
// Create a scheduler to execute the life check for each won node regularly
getContext().system().scheduler().schedule(config.getLifeCheckDuration(), config.getLifeCheckDuration(), getSelf(), LIFE_CHECK_TICK, getContext().dispatcher(), null);
// Subscribe for won node events
pubSubMediator = DistributedPubSub.get(getContext().system()).mediator();
pubSubMediator.tell(new DistributedPubSubMediator.Subscribe(WonNodeEvent.class.getName(), getSelf()), getSelf());
// Subscribe for hint events
pubSubMediator.tell(new DistributedPubSubMediator.Subscribe(HintEvent.class.getName(), getSelf()), getSelf());
pubSubMediator.tell(new DistributedPubSubMediator.Subscribe(BulkHintEvent.class.getName(), getSelf()), getSelf());
// set won nodes to skip by configuration
skipWonNodeUris.addAll(config.getSkipWonNodes());
// get all known won node uris from RDF store
Set<WonNodeInfo> wonNodeInfo = new HashSet<>();
try {
wonNodeInfo = sparqlService.retrieveAllWonNodeInfo();
} catch (Exception e) {
log.error("Error querying SPARQL endpoint {}. SPARQL endpoint must be running at matcher service startup!", sparqlService.getSparqlEndpoint());
log.error("Exception was: {}", e);
log.info("Shut down matcher service!");
System.exit(-1);
}
// Treat the known won nodes as newly discovered won nodes to register them again at startup of matcher service
for (WonNodeInfo nodeInfo : wonNodeInfo) {
if (!config.getCrawlWonNodes().contains(nodeInfo.getWonNodeURI())) {
WonNodeEvent e = new WonNodeEvent(nodeInfo.getWonNodeURI(), WonNodeEvent.STATUS.NEW_WON_NODE_DISCOVERED);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(e.getClass().getName(), e), getSelf());
}
}
// initialize the won nodes from the config file to crawl
for (String nodeUri : config.getCrawlWonNodes()) {
if (!skipWonNodeUris.contains(nodeUri)) {
if (!crawlWonNodes.containsKey(nodeUri)) {
WonNodeEvent e = new WonNodeEvent(nodeUri, WonNodeEvent.STATUS.NEW_WON_NODE_DISCOVERED);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(e.getClass().getName(), e), getSelf());
}
}
}
// initialize the crawler
crawler = getContext().actorOf(SpringExtension.SpringExtProvider.get(getContext().system()).props(MasterCrawlerActor.class), "MasterCrawlerActor");
// initialize the need event save actor
saveNeedActor = getContext().actorOf(SpringExtension.SpringExtProvider.get(getContext().system()).props(SaveNeedEventActor.class), "SaveNeedEventActor");
}
use of won.matcher.service.common.event.WonNodeEvent in project webofneeds by researchstudio-sat.
the class WonNodeControllerActor method lifeCheck.
/**
* Try to connect to unreachable won nodes from time to time
*/
private void lifeCheck() {
List<String> failedNodes = IteratorUtils.toList(failedWonNodeUris.iterator());
log.debug("retry to connect to all failed won nodes again: {}", failedNodes);
failedWonNodeUris.clear();
for (String uri : failedNodes) {
// try register at the wonnode again
WonNodeEvent e = new WonNodeEvent(uri, WonNodeEvent.STATUS.RETRY_REGISTER_FAILED_WON_NODE);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(e.getClass().getName(), e), getSelf());
}
}
use of won.matcher.service.common.event.WonNodeEvent in project webofneeds by researchstudio-sat.
the class MasterCrawlerActor method onReceive.
/**
* Process {@link won.matcher.service.crawler.msg.CrawlUriMessage} objects
*
* @param message
*/
@Override
public void onReceive(final Object message) throws InterruptedException {
if (message.equals(RECRAWL_TICK)) {
askWonNodeInfoForCrawling();
} else if (message instanceof WonNodeEvent) {
processWonNodeEvent((WonNodeEvent) message);
} else if (message instanceof CrawlUriMessage) {
CrawlUriMessage uriMsg = (CrawlUriMessage) message;
processCrawlUriMessage(uriMsg);
log.debug("Number of pending messages: {}", pendingMessages.size());
} else {
unhandled(message);
}
}
use of won.matcher.service.common.event.WonNodeEvent in project webofneeds by researchstudio-sat.
the class MasterCrawlerActor method processCrawlUriMessage.
/**
* Pass the messages to process to the workers and update meta data about crawling.
* Also create an event if a new won node is discovered.
*
* @param msg
*/
private void processCrawlUriMessage(CrawlUriMessage msg) {
log.debug("Process message: {}", msg);
if (msg.getStatus().equals(CrawlUriMessage.STATUS.PROCESS) || msg.getStatus().equals(CrawlUriMessage.STATUS.SAVE)) {
// processed. So filter out these messages here
if (pendingMessages.get(msg.getUri()) != null || doneMessages.get(msg.getUri()) != null || failedMessages.get(msg.getUri()) != null) {
log.debug("message {} already processing/processed ...", msg);
return;
}
updateMetaDataWorker.tell(msg, getSelf());
// we received an answer for the discovered won node event
if (discoveredNewWonNode(msg.getWonNodeUri())) {
log.debug("discovered new won node {}", msg.getWonNodeUri());
WonNodeEvent event = new WonNodeEvent(msg.getWonNodeUri(), WonNodeEvent.STATUS.NEW_WON_NODE_DISCOVERED);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(event.getClass().getName(), event), getSelf());
getContext().system().scheduler().scheduleOnce(RESCHEDULE_MESSAGE_DURATION, getSelf(), msg, getContext().dispatcher(), null);
} else if (!skipWonNodeUris.contains(msg.getWonNodeUri())) {
pendingMessages.put(msg.getUri(), msg);
crawlingWorker.tell(msg, getSelf());
}
} else if (msg.getStatus().equals(CrawlUriMessage.STATUS.DONE)) {
// URI crawled successfully
log.debug("Successfully processed URI: {}", msg.getUri());
updateMetaDataWorker.tell(msg, getSelf());
pendingMessages.remove(msg.getUri());
if (doneMessages.put(msg.getUri(), msg) != null) {
log.warning("URI message received twice: {}", msg.getUri());
}
logStatus();
} else if (msg.getStatus().equals(CrawlUriMessage.STATUS.FAILED)) {
// Crawling failed
log.debug("Crawling URI failed: {}", msg.getUri());
updateMetaDataWorker.tell(msg, getSelf());
pendingMessages.remove(msg.getUri());
failedMessages.put(msg.getUri(), msg);
logStatus();
}
}
Aggregations