use of won.matcher.service.common.event.HintEvent in project webofneeds by researchstudio-sat.
the class WonNodeControllerActor method onReceive.
/**
* Receive messages about newly discovered won node and decide to crawl or skip
* processing these won nodes.
*
* @param message
* @throws Exception
*/
@Override
public void onReceive(final Object message) {
if (message instanceof Terminated) {
// if it is some other actor handle it differently
handleConnectionErrors((Terminated) message);
return;
}
if (message.equals(LIFE_CHECK_TICK)) {
lifeCheck();
return;
}
if (message instanceof WonNodeEvent) {
WonNodeEvent event = (WonNodeEvent) message;
if (event.getStatus().equals(WonNodeEvent.STATUS.NEW_WON_NODE_DISCOVERED) || event.getStatus().equals(WonNodeEvent.STATUS.GET_WON_NODE_INFO_FOR_CRAWLING) || event.getStatus().equals(WonNodeEvent.STATUS.RETRY_REGISTER_FAILED_WON_NODE)) {
// won node has already been discovered and connected
if (crawlWonNodes.containsKey(event.getWonNodeUri())) {
log.debug("Won node uri '{}' already discovered", event.getWonNodeUri());
if (event.getStatus().equals(WonNodeEvent.STATUS.GET_WON_NODE_INFO_FOR_CRAWLING)) {
WonNodeInfo wonNodeInfo = crawlWonNodes.get(event.getWonNodeUri()).getWonNodeInfo();
WonNodeEvent e = new WonNodeEvent(event.getWonNodeUri(), WonNodeEvent.STATUS.CONNECTED_TO_WON_NODE, wonNodeInfo);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(e.getClass().getName(), e), getSelf());
}
return;
}
// skip crawling of won nodes in the skip list
if (skipWonNodeUris.contains(event.getWonNodeUri())) {
log.debug("Skip crawling won node with uri '{}'", event.getWonNodeUri());
WonNodeEvent e = new WonNodeEvent(event.getWonNodeUri(), WonNodeEvent.STATUS.SKIP_WON_NODE);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(e.getClass().getName(), e), getSelf());
return;
}
// shall we try to connect to the won node or has it failed already ?
if (failedWonNodeUris.contains(event.getWonNodeUri())) {
log.debug("Suppress connection to already failed won node with uri {} , will try to connect later ...", event.getWonNodeUri());
return;
}
// try the connect to won node
boolean logRegisterWarningForWonNode = event.getStatus().equals(WonNodeEvent.STATUS.RETRY_REGISTER_FAILED_WON_NODE);
WonNodeConnection wonNodeConnection = addWonNodeForCrawling(event.getWonNodeUri(), logRegisterWarningForWonNode);
// connection failed ?
if (failedWonNodeUris.contains(event.getWonNodeUri())) {
log.debug("Still could not connect to won node with uri: {}, will retry later ...", event.getWonNodeUri());
return;
}
// tell the crawler about discovered won nodes
if (wonNodeConnection == null || wonNodeConnection.getWonNodeInfo() == null) {
log.error("Cannot retrieve won node info from won node connection!");
return;
}
WonNodeEvent e = new WonNodeEvent(event.getWonNodeUri(), WonNodeEvent.STATUS.CONNECTED_TO_WON_NODE, wonNodeConnection.getWonNodeInfo());
pubSubMediator.tell(new DistributedPubSubMediator.Publish(e.getClass().getName(), e), getSelf());
return;
}
}
// send back hints to won nodes
if (message instanceof HintEvent) {
processHint((HintEvent) message);
return;
} else if (message instanceof BulkHintEvent) {
BulkHintEvent bulkHintEvent = (BulkHintEvent) message;
for (HintEvent hint : bulkHintEvent.getHintEvents()) {
processHint(hint);
}
return;
}
unhandled(message);
}
use of won.matcher.service.common.event.HintEvent in project webofneeds by researchstudio-sat.
the class RescalMatcherActor method executeRescalAlgorithm.
/**
* Load the need and connection data from the sparql endpoint, preprocess the data and write it to some directory
* to be processed by the rescal python algorithm that produces hints. The hints are then loaded and send to
* the event bus.
*
* @throws IOException
* @throws InterruptedException
*/
private void executeRescalAlgorithm() throws IOException, InterruptedException {
// load the needs and connections from the rdf store
log.info("start processing (every {} minutes) ...", config.getExecutionDuration());
long queryDate = System.currentTimeMillis();
log.info("query needs and connections from rdf store '{}' from date '{}' to date '{}'", config.getSparqlEndpoint(), lastQueryDate, queryDate);
// add the attributes of the needs to the rescal tensor
TensorEntryAllGenerator tensorEntryAllGenerator = new TensorEntryAllGenerator("queries/attribute", config.getSparqlEndpoint(), lastQueryDate, queryDate);
TensorEntryTokenizer tokenizer = new TensorEntryTokenizer(tensorEntryAllGenerator.generateTensorEntries());
Collection<TensorEntry> tensorEntries = tokenizer.generateTensorEntries();
for (TensorEntry entry : tensorEntries) {
rescalInputData.addNeedAttribute(entry);
}
// add the connections between the needs to the rescal tensor
tensorEntryAllGenerator = new TensorEntryAllGenerator("queries/connection", config.getSparqlEndpoint(), lastQueryDate, queryDate);
tensorEntries = tensorEntryAllGenerator.generateTensorEntries();
for (TensorEntry entry : tensorEntries) {
rescalInputData.addNeedConnection(entry.getNeedUri(), entry.getValue(), true);
}
log.info("number of needs in tensor: {}", rescalInputData.getNeeds().size());
log.info("number of attributes in tensor: {}", rescalInputData.getAttributes().size());
log.info("number of connections in tensor: {}", rescalInputData.getNumberOfConnections());
log.info("number of slices in tensor: {}", rescalInputData.getSlices().size());
if (!rescalInputData.isValidTensor()) {
log.info("not enough tensor data available for execution yet, wait for next execution!");
return;
}
// write the files for rescal algorithm
log.info("write rescal input data to folder: {}", config.getExecutionDirectory());
TensorMatchingData cleanedTensorData = rescalInputData.writeCleanedOutputFiles(config.getExecutionDirectory());
int tensorSize = cleanedTensorData.getTensorDimensions()[0];
if (rescalInputData.getNeeds().size() + rescalInputData.getAttributes().size() < config.getRescalRank()) {
log.info("Do not start rescal algorithm since tensor size (number of needs + number of attributes) = {} is " + "smaller than rank parameter {}.", tensorSize, config.getRescalRank());
return;
}
// execute the rescal algorithm in python
String pythonCall = "python " + config.getPythonScriptDirectory() + "/rescal-matcher.py -inputfolder " + config.getExecutionDirectory() + " -outputfolder " + config.getExecutionDirectory() + "/output" + " -rank " + config.getRescalRank() + " -threshold " + config.getRescalThreshold();
log.info("execute python script: " + pythonCall);
Process pythonProcess = Runtime.getRuntime().exec(pythonCall);
BufferedReader in = new BufferedReader(new InputStreamReader(pythonProcess.getInputStream()));
String line;
while ((line = in.readLine()) != null) {
log.info(line);
}
in.close();
BufferedReader err = new BufferedReader(new InputStreamReader(pythonProcess.getErrorStream()));
while ((line = err.readLine()) != null) {
log.warning(line);
}
err.close();
int returnCode = pythonProcess.waitFor();
if (returnCode != 0) {
log.error("rescal python call returned error code: " + returnCode);
return;
}
// load the predicted hints and send the to the event bus of the matching service
BulkHintEvent hintsEvent = hintReader.readHints(rescalInputData);
int numHints = (hintsEvent == null || hintsEvent.getHintEvents() == null) ? 0 : hintsEvent.getHintEvents().size();
log.info("loaded {} hints into bulk hint event and publish", numHints);
if (numHints > 0) {
StringBuilder builder = new StringBuilder();
for (HintEvent hint : hintsEvent.getHintEvents()) {
builder.append("\n- " + hint);
}
log.info(builder.toString());
pubSubMediator.tell(new DistributedPubSubMediator.Publish(hintsEvent.getClass().getName(), hintsEvent), getSelf());
}
lastQueryDate = queryDate;
}
use of won.matcher.service.common.event.HintEvent in project webofneeds by researchstudio-sat.
the class HintBuilder method generateHintsFromSearchResult.
public BulkHintEvent generateHintsFromSearchResult(final SolrDocumentList docs, final NeedEvent need, NeedModelWrapper needModelWrapper, boolean doSuppressHintForNeed, boolean doSuppressHintForMatchedNeeds, boolean kneeDetection) {
// check if knee detection should be performed
SolrDocumentList newDocs = docs;
if (kneeDetection) {
newDocs = calculateMatchingResults(docs);
}
BulkHintEvent bulkHintEvent = new BulkHintEvent();
log.info("Received {} matches as query result for need {}, keeping the top {} ", new Object[] { (docs != null) ? docs.size() : 0, need, newDocs.size() });
boolean noHintForMe = needModelWrapper.hasFlag(WON.NO_HINT_FOR_ME);
boolean noHintForCounterpart = needModelWrapper.hasFlag(WON.NO_HINT_FOR_COUNTERPART);
log.debug("need to be matched has NoHintForMe: {}, NoHintForCounterpart: {} ", noHintForMe, noHintForCounterpart);
for (SolrDocument doc : newDocs) {
// NOTE: not the whole document is loaded here. The fields that are selected are defined
// in won.matcher.solr.query.DefaultMatcherQueryExecuter - if additional fields are required, the field list
// has to be extended in that class.
String matchedNeedUri = doc.getFieldValue("id").toString();
if (matchedNeedUri == null) {
log.debug("omitting matched need: could not extract need URI");
continue;
}
List<String> flags = getValueList(doc, HAS_FLAG_SOLR_FIELD);
boolean matchedNeedNoHintForMe = flags.contains(WON.NO_HINT_FOR_ME.toString());
boolean matchedNeedNoHintForCounterpart = flags.contains(WON.NO_HINT_FOR_COUNTERPART.toString());
// check the matching contexts of the two needs that are supposed to be matched
// send only hints to needs if their matching contexts overlap (if one need has empty matching context it always receives hints)
Collection<Object> contextSolrFieldValues = doc.getFieldValues(MatchingContextQueryFactory.MATCHING_CONTEXT_SOLR_FIELD);
Collection<String> matchedNeedMatchingContexts = new LinkedList<>();
if (contextSolrFieldValues != null) {
matchedNeedMatchingContexts = contextSolrFieldValues.stream().map(a -> (String) a).collect(Collectors.toList());
}
Collection<String> matchingContexts = needModelWrapper.getMatchingContexts();
if (matchingContexts == null) {
matchingContexts = new LinkedList<>();
}
boolean contextOverlap = CollectionUtils.intersection(matchedNeedMatchingContexts, matchingContexts).size() > 0;
boolean suppressHintsForMyContexts = !contextOverlap && !(CollectionUtils.isEmpty(matchingContexts));
boolean suppressHintsForCounterpartContexts = !contextOverlap && !(CollectionUtils.isEmpty(matchedNeedMatchingContexts));
// suppress hints for current if its flags or its counterparts flags say so or if it was specified in the calling parameters or matching contexts
doSuppressHintForNeed = noHintForMe || matchedNeedNoHintForCounterpart || doSuppressHintForNeed || suppressHintsForMyContexts;
// suppress hints for matched need if its flags or its counterparts flags say so or if it was specified in the calling parameters or matching contexts
doSuppressHintForMatchedNeeds = noHintForCounterpart || matchedNeedNoHintForMe || doSuppressHintForMatchedNeeds || suppressHintsForCounterpartContexts;
if (log.isDebugEnabled()) {
log.debug("matched need has NoHintForMe: {}, NoHintForCounterpart: {}", matchedNeedNoHintForMe, matchedNeedNoHintForCounterpart);
log.debug("need will receive a hint: {} (uri: {})", !doSuppressHintForNeed, need.getUri());
log.debug("matched need need will receive a hint: {} (uri: {})", !doSuppressHintForMatchedNeeds, matchedNeedUri);
log.debug("need matching contexts: {}", matchingContexts);
log.debug("matched need matching contexts: {}", matchedNeedMatchingContexts);
}
if (doSuppressHintForNeed && doSuppressHintForMatchedNeeds) {
log.debug("no hints to be sent because of Suppress settings");
continue;
}
// wonNodeUri can be returned as either a String or ArrayList, not sure on what this depends
String wonNodeUri = getFieldValueFirstOfListIfNecessary(doc, WON_NODE_SOLR_FIELD);
if (wonNodeUri == null) {
log.debug("omitting matched need {}: could not extract WoN node URI", matchedNeedUri);
continue;
}
// normalize the final score
double score = Double.valueOf(doc.getFieldValue("score").toString()) * config.getScoreNormalizationFactor();
score = Math.max(0, Math.min(1, score));
log.debug("generate hint for match {} with normalized score {}", matchedNeedUri, score);
if (!doSuppressHintForNeed) {
bulkHintEvent.addHintEvent(new HintEvent(need.getWonNodeUri(), need.getUri(), wonNodeUri, matchedNeedUri, config.getSolrServerPublicUri(), score));
}
// also send the same hints to the other side (remote need and wonnode)?
if (!doSuppressHintForMatchedNeeds) {
bulkHintEvent.addHintEvent(new HintEvent(wonNodeUri, matchedNeedUri, need.getWonNodeUri(), need.getUri(), config.getSolrServerPublicUri(), score));
}
}
return bulkHintEvent;
}
use of won.matcher.service.common.event.HintEvent in project webofneeds by researchstudio-sat.
the class HintProducerProtocolActor method onTransformOutgoingMessage.
/**
* transform hint events to camel messages that can be sent to the won node
*
* @param message supposed to be a {@link HintEvent}
* @return
*/
@Override
public Object onTransformOutgoingMessage(Object message) {
HintEvent hint = (HintEvent) message;
Map<String, Object> headers = new HashMap<>();
headers.put("needURI", hint.getFromNeedUri());
headers.put("otherNeedURI", hint.getToNeedUri());
headers.put("score", String.valueOf(hint.getScore()));
headers.put("originator", hint.getMatcherUri());
// headers.put("content", RdfUtils.toString(hint.deserializeExplanationModel()));
// headers.put("remoteBrokerEndpoint", localBrokerUri);
headers.put("methodName", "hint");
WonMessage wonMessage = createHintWonMessage(hint);
Object body = WonMessageEncoder.encode(wonMessage, Lang.TRIG);
CamelMessage camelMsg = new CamelMessage(body, headers);
// monitoring code
monitoringService.stopClock(MonitoringService.NEED_HINT_STOPWATCH, hint.getFromNeedUri());
log.debug("Send hint camel message {}", hint.getFromNeedUri());
return camelMsg;
}
Aggregations