use of won.matcher.service.common.event.BulkHintEvent in project webofneeds by researchstudio-sat.
the class WonNodeControllerActor method onReceive.
/**
* Receive messages about newly discovered won node and decide to crawl or skip
* processing these won nodes.
*
* @param message
* @throws Exception
*/
@Override
public void onReceive(final Object message) {
if (message instanceof Terminated) {
// if it is some other actor handle it differently
handleConnectionErrors((Terminated) message);
return;
}
if (message.equals(LIFE_CHECK_TICK)) {
lifeCheck();
return;
}
if (message instanceof WonNodeEvent) {
WonNodeEvent event = (WonNodeEvent) message;
if (event.getStatus().equals(WonNodeEvent.STATUS.NEW_WON_NODE_DISCOVERED) || event.getStatus().equals(WonNodeEvent.STATUS.GET_WON_NODE_INFO_FOR_CRAWLING) || event.getStatus().equals(WonNodeEvent.STATUS.RETRY_REGISTER_FAILED_WON_NODE)) {
// won node has already been discovered and connected
if (crawlWonNodes.containsKey(event.getWonNodeUri())) {
log.debug("Won node uri '{}' already discovered", event.getWonNodeUri());
if (event.getStatus().equals(WonNodeEvent.STATUS.GET_WON_NODE_INFO_FOR_CRAWLING)) {
WonNodeInfo wonNodeInfo = crawlWonNodes.get(event.getWonNodeUri()).getWonNodeInfo();
WonNodeEvent e = new WonNodeEvent(event.getWonNodeUri(), WonNodeEvent.STATUS.CONNECTED_TO_WON_NODE, wonNodeInfo);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(e.getClass().getName(), e), getSelf());
}
return;
}
// skip crawling of won nodes in the skip list
if (skipWonNodeUris.contains(event.getWonNodeUri())) {
log.debug("Skip crawling won node with uri '{}'", event.getWonNodeUri());
WonNodeEvent e = new WonNodeEvent(event.getWonNodeUri(), WonNodeEvent.STATUS.SKIP_WON_NODE);
pubSubMediator.tell(new DistributedPubSubMediator.Publish(e.getClass().getName(), e), getSelf());
return;
}
// shall we try to connect to the won node or has it failed already ?
if (failedWonNodeUris.contains(event.getWonNodeUri())) {
log.debug("Suppress connection to already failed won node with uri {} , will try to connect later ...", event.getWonNodeUri());
return;
}
// try the connect to won node
boolean logRegisterWarningForWonNode = event.getStatus().equals(WonNodeEvent.STATUS.RETRY_REGISTER_FAILED_WON_NODE);
WonNodeConnection wonNodeConnection = addWonNodeForCrawling(event.getWonNodeUri(), logRegisterWarningForWonNode);
// connection failed ?
if (failedWonNodeUris.contains(event.getWonNodeUri())) {
log.debug("Still could not connect to won node with uri: {}, will retry later ...", event.getWonNodeUri());
return;
}
// tell the crawler about discovered won nodes
if (wonNodeConnection == null || wonNodeConnection.getWonNodeInfo() == null) {
log.error("Cannot retrieve won node info from won node connection!");
return;
}
WonNodeEvent e = new WonNodeEvent(event.getWonNodeUri(), WonNodeEvent.STATUS.CONNECTED_TO_WON_NODE, wonNodeConnection.getWonNodeInfo());
pubSubMediator.tell(new DistributedPubSubMediator.Publish(e.getClass().getName(), e), getSelf());
return;
}
}
// send back hints to won nodes
if (message instanceof HintEvent) {
processHint((HintEvent) message);
return;
} else if (message instanceof BulkHintEvent) {
BulkHintEvent bulkHintEvent = (BulkHintEvent) message;
for (HintEvent hint : bulkHintEvent.getHintEvents()) {
processHint(hint);
}
return;
}
unhandled(message);
}
use of won.matcher.service.common.event.BulkHintEvent in project webofneeds by researchstudio-sat.
the class RescalMatcherActor method executeRescalAlgorithm.
/**
* Load the need and connection data from the sparql endpoint, preprocess the data and write it to some directory
* to be processed by the rescal python algorithm that produces hints. The hints are then loaded and send to
* the event bus.
*
* @throws IOException
* @throws InterruptedException
*/
private void executeRescalAlgorithm() throws IOException, InterruptedException {
// load the needs and connections from the rdf store
log.info("start processing (every {} minutes) ...", config.getExecutionDuration());
long queryDate = System.currentTimeMillis();
log.info("query needs and connections from rdf store '{}' from date '{}' to date '{}'", config.getSparqlEndpoint(), lastQueryDate, queryDate);
// add the attributes of the needs to the rescal tensor
TensorEntryAllGenerator tensorEntryAllGenerator = new TensorEntryAllGenerator("queries/attribute", config.getSparqlEndpoint(), lastQueryDate, queryDate);
TensorEntryTokenizer tokenizer = new TensorEntryTokenizer(tensorEntryAllGenerator.generateTensorEntries());
Collection<TensorEntry> tensorEntries = tokenizer.generateTensorEntries();
for (TensorEntry entry : tensorEntries) {
rescalInputData.addNeedAttribute(entry);
}
// add the connections between the needs to the rescal tensor
tensorEntryAllGenerator = new TensorEntryAllGenerator("queries/connection", config.getSparqlEndpoint(), lastQueryDate, queryDate);
tensorEntries = tensorEntryAllGenerator.generateTensorEntries();
for (TensorEntry entry : tensorEntries) {
rescalInputData.addNeedConnection(entry.getNeedUri(), entry.getValue(), true);
}
log.info("number of needs in tensor: {}", rescalInputData.getNeeds().size());
log.info("number of attributes in tensor: {}", rescalInputData.getAttributes().size());
log.info("number of connections in tensor: {}", rescalInputData.getNumberOfConnections());
log.info("number of slices in tensor: {}", rescalInputData.getSlices().size());
if (!rescalInputData.isValidTensor()) {
log.info("not enough tensor data available for execution yet, wait for next execution!");
return;
}
// write the files for rescal algorithm
log.info("write rescal input data to folder: {}", config.getExecutionDirectory());
TensorMatchingData cleanedTensorData = rescalInputData.writeCleanedOutputFiles(config.getExecutionDirectory());
int tensorSize = cleanedTensorData.getTensorDimensions()[0];
if (rescalInputData.getNeeds().size() + rescalInputData.getAttributes().size() < config.getRescalRank()) {
log.info("Do not start rescal algorithm since tensor size (number of needs + number of attributes) = {} is " + "smaller than rank parameter {}.", tensorSize, config.getRescalRank());
return;
}
// execute the rescal algorithm in python
String pythonCall = "python " + config.getPythonScriptDirectory() + "/rescal-matcher.py -inputfolder " + config.getExecutionDirectory() + " -outputfolder " + config.getExecutionDirectory() + "/output" + " -rank " + config.getRescalRank() + " -threshold " + config.getRescalThreshold();
log.info("execute python script: " + pythonCall);
Process pythonProcess = Runtime.getRuntime().exec(pythonCall);
BufferedReader in = new BufferedReader(new InputStreamReader(pythonProcess.getInputStream()));
String line;
while ((line = in.readLine()) != null) {
log.info(line);
}
in.close();
BufferedReader err = new BufferedReader(new InputStreamReader(pythonProcess.getErrorStream()));
while ((line = err.readLine()) != null) {
log.warning(line);
}
err.close();
int returnCode = pythonProcess.waitFor();
if (returnCode != 0) {
log.error("rescal python call returned error code: " + returnCode);
return;
}
// load the predicted hints and send the to the event bus of the matching service
BulkHintEvent hintsEvent = hintReader.readHints(rescalInputData);
int numHints = (hintsEvent == null || hintsEvent.getHintEvents() == null) ? 0 : hintsEvent.getHintEvents().size();
log.info("loaded {} hints into bulk hint event and publish", numHints);
if (numHints > 0) {
StringBuilder builder = new StringBuilder();
for (HintEvent hint : hintsEvent.getHintEvents()) {
builder.append("\n- " + hint);
}
log.info(builder.toString());
pubSubMediator.tell(new DistributedPubSubMediator.Publish(hintsEvent.getClass().getName(), hintsEvent), getSelf());
}
lastQueryDate = queryDate;
}
use of won.matcher.service.common.event.BulkHintEvent in project webofneeds by researchstudio-sat.
the class SolrMatcherActor method processActiveNeedEvent.
protected void processActiveNeedEvent(NeedEvent needEvent) throws IOException, SolrServerException, JsonLdError {
log.info("Start processing active need event {}", needEvent);
// check if the need has doNotMatch flag, then do not use it for querying or indexing
Dataset dataset = needEvent.deserializeNeedDataset();
NeedModelWrapper needModelWrapper = new NeedModelWrapper(dataset);
if (needModelWrapper.hasFlag(WON.NO_HINT_FOR_ME) && needModelWrapper.hasFlag(WON.NO_HINT_FOR_COUNTERPART)) {
log.info("Discarding received need due to flags won:NoHintForMe and won:NoHintForCounterpart: {}", needEvent);
return;
}
// check if need is usedForTesting only
boolean usedForTesting = needModelWrapper.hasFlag(WON.USED_FOR_TESTING);
SolrMatcherQueryExecutor queryExecutor = (usedForTesting ? testQueryExecuter : defaultQueryExecuter);
// create another query depending if the current need is "WhatsAround" or a default need
String queryString = null;
if (needModelWrapper.hasFlag(WON.WHATS_AROUND)) {
// WhatsAround doesnt match on terms only other needs in close location are boosted
WhatsAroundQueryFactory qf = new WhatsAroundQueryFactory(dataset);
queryString = qf.createQuery();
} else {
// default query matches content terms (of fields title, description and tags) with different weights
// and gives an additional multiplicative boost for geographically closer needs
DefaultNeedQueryFactory qf = new DefaultNeedQueryFactory(dataset);
queryString = qf.createQuery();
}
// add filters to the query: default filters are
// - need status active
// - creation date overlap 1 month
// - OR-filtering for matching contexts if any were specified
// now create three slightly different queries for different lists of needs:
// 1) needs without NoHintForCounterpart => hints for current need
// 2) needs without NoHintForSelf, excluding WhatsAround needs => hints for needs in index that are not WhatsAround
// 3) needs without NoHintForSelf that are only WhatsAround needs => hints for needs in index that are WhatsAround
// to achieve this use a different filters for these queries
// case 1) needs without NoHintForCounterpart => hints for current need
List<String> filterQueries = new LinkedList<>();
filterQueries.add(new NeedStateQueryFactory(dataset).createQuery());
filterQueries.add(new CreationDateQueryFactory(dataset, 1, ChronoUnit.MONTHS).createQuery());
filterQueries.add(new BooleanQueryFactory(BooleanQueryFactory.BooleanOperator.NOT, new HasFlagQueryFactory(HasFlagQueryFactory.FLAGS.NO_HINT_FOR_COUNTERPART)).createQuery());
if (needModelWrapper.getMatchingContexts() != null && needModelWrapper.getMatchingContexts().size() > 0) {
filterQueries.add(new MatchingContextQueryFactory(needModelWrapper.getMatchingContexts()).createQuery());
}
if (!needModelWrapper.hasFlag(WON.NO_HINT_FOR_ME)) {
// execute the query
log.info("query Solr endpoint {} for need {} and need list 1 (without NoHintForCounterpart)", config.getSolrEndpointUri(usedForTesting), needEvent.getUri());
SolrDocumentList docs = queryExecutor.executeNeedQuery(queryString, null, filterQueries.toArray(new String[filterQueries.size()]));
if (docs != null) {
// generate hints for current need (only generate hints for current need, suppress hints for matched needs,
// perform knee detection depending on current need is WhatsAround or not)
BulkHintEvent events = null;
if (needModelWrapper.hasFlag(WON.WHATS_AROUND)) {
events = hintBuilder.generateHintsFromSearchResult(docs, needEvent, needModelWrapper, false, true, false);
} else {
events = hintBuilder.generateHintsFromSearchResult(docs, needEvent, needModelWrapper, false, true, true);
}
log.info("Create {} hints for need {} and need list 1 (without NoHintForCounterpart)", events.getHintEvents().size(), needEvent);
// publish hints to current need
if (events.getHintEvents().size() != 0) {
getSender().tell(events, getSelf());
}
} else {
log.warning("No results found for need list 1 (without NoHintForCounterpart) query of need ", needEvent);
}
}
// case 2) needs without NoHintForSelf, excluding WhatsAround needs => hints for needs in index that are not WhatsAround
filterQueries = new LinkedList<>();
filterQueries.add(new NeedStateQueryFactory(dataset).createQuery());
filterQueries.add(new CreationDateQueryFactory(dataset, 1, ChronoUnit.MONTHS).createQuery());
filterQueries.add(new BooleanQueryFactory(BooleanQueryFactory.BooleanOperator.NOT, new HasFlagQueryFactory(HasFlagQueryFactory.FLAGS.NO_HINT_FOR_ME)).createQuery());
filterQueries.add(new BooleanQueryFactory(BooleanQueryFactory.BooleanOperator.NOT, new HasFlagQueryFactory(HasFlagQueryFactory.FLAGS.WHATS_AROUND)).createQuery());
if (needModelWrapper.getMatchingContexts() != null && needModelWrapper.getMatchingContexts().size() > 0) {
filterQueries.add(new MatchingContextQueryFactory(needModelWrapper.getMatchingContexts()).createQuery());
}
if (!needModelWrapper.hasFlag(WON.NO_HINT_FOR_COUNTERPART)) {
// execute the query
log.info("query Solr endpoint {} for need {} and need list 2 (without NoHintForSelf, excluding WhatsAround needs)", config.getSolrEndpointUri(usedForTesting), needEvent.getUri());
SolrDocumentList docs = queryExecutor.executeNeedQuery(queryString, null, filterQueries.toArray(new String[filterQueries.size()]));
if (docs != null) {
// generate hints for matched needs (suppress hints for current need, only generate hints for matched needs, perform knee detection)
BulkHintEvent events = hintBuilder.generateHintsFromSearchResult(docs, needEvent, needModelWrapper, true, false, true);
log.info("Create {} hints for need {} and need list 2 (without NoHintForSelf, excluding WhatsAround needs)", events.getHintEvents().size(), needEvent);
// publish hints to current need
if (events.getHintEvents().size() != 0) {
getSender().tell(events, getSelf());
}
} else {
log.warning("No results found for need list 2 (without NoHintForSelf, excluding WhatsAround needs) query of need ", needEvent);
}
}
// case 3) needs without NoHintForSelf that are only WhatsAround needs => hints for needs in index that are WhatsAround
filterQueries = new LinkedList<>();
filterQueries.add(new NeedStateQueryFactory(dataset).createQuery());
filterQueries.add(new CreationDateQueryFactory(dataset, 1, ChronoUnit.MONTHS).createQuery());
filterQueries.add(new BooleanQueryFactory(BooleanQueryFactory.BooleanOperator.NOT, new HasFlagQueryFactory(HasFlagQueryFactory.FLAGS.NO_HINT_FOR_ME)).createQuery());
filterQueries.add(new HasFlagQueryFactory(HasFlagQueryFactory.FLAGS.WHATS_AROUND).createQuery());
if (needModelWrapper.getMatchingContexts() != null && needModelWrapper.getMatchingContexts().size() > 0) {
filterQueries.add(new MatchingContextQueryFactory(needModelWrapper.getMatchingContexts()).createQuery());
}
if (!needModelWrapper.hasFlag(WON.NO_HINT_FOR_COUNTERPART)) {
// execute the query
log.info("query Solr endpoint {} for need {} and need list 3 (without NoHintForSelf that are only WhatsAround needs)", config.getSolrEndpointUri(usedForTesting), needEvent.getUri());
SolrDocumentList docs = queryExecutor.executeNeedQuery(queryString, null, filterQueries.toArray(new String[filterQueries.size()]));
if (docs != null) {
// generate hints for matched needs (suppress hints for current need, only generate hints for matched needs, do not perform knee detection)
BulkHintEvent events = hintBuilder.generateHintsFromSearchResult(docs, needEvent, needModelWrapper, true, false, false);
log.info("Create {} hints for need {} and need list 3 (without NoHintForSelf that are only WhatsAround needs)", events.getHintEvents().size(), needEvent);
// publish hints to current need
if (events.getHintEvents().size() != 0) {
getSender().tell(events, getSelf());
}
} else {
log.warning("No results found for need list 3 (without NoHintForSelf that are only WhatsAround needs) query of need ", needEvent);
}
}
// index need
log.info("Add need event content {} to solr index", needEvent);
needIndexer.index(dataset);
}
use of won.matcher.service.common.event.BulkHintEvent in project webofneeds by researchstudio-sat.
the class HintBuilder method generateHintsFromSearchResult.
public BulkHintEvent generateHintsFromSearchResult(final SolrDocumentList docs, final NeedEvent need, NeedModelWrapper needModelWrapper, boolean doSuppressHintForNeed, boolean doSuppressHintForMatchedNeeds, boolean kneeDetection) {
// check if knee detection should be performed
SolrDocumentList newDocs = docs;
if (kneeDetection) {
newDocs = calculateMatchingResults(docs);
}
BulkHintEvent bulkHintEvent = new BulkHintEvent();
log.info("Received {} matches as query result for need {}, keeping the top {} ", new Object[] { (docs != null) ? docs.size() : 0, need, newDocs.size() });
boolean noHintForMe = needModelWrapper.hasFlag(WON.NO_HINT_FOR_ME);
boolean noHintForCounterpart = needModelWrapper.hasFlag(WON.NO_HINT_FOR_COUNTERPART);
log.debug("need to be matched has NoHintForMe: {}, NoHintForCounterpart: {} ", noHintForMe, noHintForCounterpart);
for (SolrDocument doc : newDocs) {
// NOTE: not the whole document is loaded here. The fields that are selected are defined
// in won.matcher.solr.query.DefaultMatcherQueryExecuter - if additional fields are required, the field list
// has to be extended in that class.
String matchedNeedUri = doc.getFieldValue("id").toString();
if (matchedNeedUri == null) {
log.debug("omitting matched need: could not extract need URI");
continue;
}
List<String> flags = getValueList(doc, HAS_FLAG_SOLR_FIELD);
boolean matchedNeedNoHintForMe = flags.contains(WON.NO_HINT_FOR_ME.toString());
boolean matchedNeedNoHintForCounterpart = flags.contains(WON.NO_HINT_FOR_COUNTERPART.toString());
// check the matching contexts of the two needs that are supposed to be matched
// send only hints to needs if their matching contexts overlap (if one need has empty matching context it always receives hints)
Collection<Object> contextSolrFieldValues = doc.getFieldValues(MatchingContextQueryFactory.MATCHING_CONTEXT_SOLR_FIELD);
Collection<String> matchedNeedMatchingContexts = new LinkedList<>();
if (contextSolrFieldValues != null) {
matchedNeedMatchingContexts = contextSolrFieldValues.stream().map(a -> (String) a).collect(Collectors.toList());
}
Collection<String> matchingContexts = needModelWrapper.getMatchingContexts();
if (matchingContexts == null) {
matchingContexts = new LinkedList<>();
}
boolean contextOverlap = CollectionUtils.intersection(matchedNeedMatchingContexts, matchingContexts).size() > 0;
boolean suppressHintsForMyContexts = !contextOverlap && !(CollectionUtils.isEmpty(matchingContexts));
boolean suppressHintsForCounterpartContexts = !contextOverlap && !(CollectionUtils.isEmpty(matchedNeedMatchingContexts));
// suppress hints for current if its flags or its counterparts flags say so or if it was specified in the calling parameters or matching contexts
doSuppressHintForNeed = noHintForMe || matchedNeedNoHintForCounterpart || doSuppressHintForNeed || suppressHintsForMyContexts;
// suppress hints for matched need if its flags or its counterparts flags say so or if it was specified in the calling parameters or matching contexts
doSuppressHintForMatchedNeeds = noHintForCounterpart || matchedNeedNoHintForMe || doSuppressHintForMatchedNeeds || suppressHintsForCounterpartContexts;
if (log.isDebugEnabled()) {
log.debug("matched need has NoHintForMe: {}, NoHintForCounterpart: {}", matchedNeedNoHintForMe, matchedNeedNoHintForCounterpart);
log.debug("need will receive a hint: {} (uri: {})", !doSuppressHintForNeed, need.getUri());
log.debug("matched need need will receive a hint: {} (uri: {})", !doSuppressHintForMatchedNeeds, matchedNeedUri);
log.debug("need matching contexts: {}", matchingContexts);
log.debug("matched need matching contexts: {}", matchedNeedMatchingContexts);
}
if (doSuppressHintForNeed && doSuppressHintForMatchedNeeds) {
log.debug("no hints to be sent because of Suppress settings");
continue;
}
// wonNodeUri can be returned as either a String or ArrayList, not sure on what this depends
String wonNodeUri = getFieldValueFirstOfListIfNecessary(doc, WON_NODE_SOLR_FIELD);
if (wonNodeUri == null) {
log.debug("omitting matched need {}: could not extract WoN node URI", matchedNeedUri);
continue;
}
// normalize the final score
double score = Double.valueOf(doc.getFieldValue("score").toString()) * config.getScoreNormalizationFactor();
score = Math.max(0, Math.min(1, score));
log.debug("generate hint for match {} with normalized score {}", matchedNeedUri, score);
if (!doSuppressHintForNeed) {
bulkHintEvent.addHintEvent(new HintEvent(need.getWonNodeUri(), need.getUri(), wonNodeUri, matchedNeedUri, config.getSolrServerPublicUri(), score));
}
// also send the same hints to the other side (remote need and wonnode)?
if (!doSuppressHintForMatchedNeeds) {
bulkHintEvent.addHintEvent(new HintEvent(wonNodeUri, matchedNeedUri, need.getWonNodeUri(), need.getUri(), config.getSolrServerPublicUri(), score));
}
}
return bulkHintEvent;
}
Aggregations