Search in sources :

Example 1 with TensorMatchingData

use of won.matcher.utils.tensor.TensorMatchingData in project webofneeds by researchstudio-sat.

the class RescalMatcherActor method executeRescalAlgorithm.

/**
 * Load the need and connection data from the sparql endpoint, preprocess the data and write it to some directory
 * to be processed by the rescal python algorithm that produces hints. The hints are then loaded and send to
 * the event bus.
 *
 * @throws IOException
 * @throws InterruptedException
 */
private void executeRescalAlgorithm() throws IOException, InterruptedException {
    // load the needs and connections from the rdf store
    log.info("start processing (every {} minutes) ...", config.getExecutionDuration());
    long queryDate = System.currentTimeMillis();
    log.info("query needs and connections from rdf store '{}' from date '{}' to date '{}'", config.getSparqlEndpoint(), lastQueryDate, queryDate);
    // add the attributes of the needs to the rescal tensor
    TensorEntryAllGenerator tensorEntryAllGenerator = new TensorEntryAllGenerator("queries/attribute", config.getSparqlEndpoint(), lastQueryDate, queryDate);
    TensorEntryTokenizer tokenizer = new TensorEntryTokenizer(tensorEntryAllGenerator.generateTensorEntries());
    Collection<TensorEntry> tensorEntries = tokenizer.generateTensorEntries();
    for (TensorEntry entry : tensorEntries) {
        rescalInputData.addNeedAttribute(entry);
    }
    // add the connections between the needs to the rescal tensor
    tensorEntryAllGenerator = new TensorEntryAllGenerator("queries/connection", config.getSparqlEndpoint(), lastQueryDate, queryDate);
    tensorEntries = tensorEntryAllGenerator.generateTensorEntries();
    for (TensorEntry entry : tensorEntries) {
        rescalInputData.addNeedConnection(entry.getNeedUri(), entry.getValue(), true);
    }
    log.info("number of needs in tensor: {}", rescalInputData.getNeeds().size());
    log.info("number of attributes in tensor: {}", rescalInputData.getAttributes().size());
    log.info("number of connections in tensor: {}", rescalInputData.getNumberOfConnections());
    log.info("number of slices in tensor: {}", rescalInputData.getSlices().size());
    if (!rescalInputData.isValidTensor()) {
        log.info("not enough tensor data available for execution yet, wait for next execution!");
        return;
    }
    // write the files for rescal algorithm
    log.info("write rescal input data to folder: {}", config.getExecutionDirectory());
    TensorMatchingData cleanedTensorData = rescalInputData.writeCleanedOutputFiles(config.getExecutionDirectory());
    int tensorSize = cleanedTensorData.getTensorDimensions()[0];
    if (rescalInputData.getNeeds().size() + rescalInputData.getAttributes().size() < config.getRescalRank()) {
        log.info("Do not start rescal algorithm since tensor size (number of needs + number of attributes) = {} is " + "smaller than rank parameter {}.", tensorSize, config.getRescalRank());
        return;
    }
    // execute the rescal algorithm in python
    String pythonCall = "python " + config.getPythonScriptDirectory() + "/rescal-matcher.py -inputfolder " + config.getExecutionDirectory() + " -outputfolder " + config.getExecutionDirectory() + "/output" + " -rank " + config.getRescalRank() + " -threshold " + config.getRescalThreshold();
    log.info("execute python script: " + pythonCall);
    Process pythonProcess = Runtime.getRuntime().exec(pythonCall);
    BufferedReader in = new BufferedReader(new InputStreamReader(pythonProcess.getInputStream()));
    String line;
    while ((line = in.readLine()) != null) {
        log.info(line);
    }
    in.close();
    BufferedReader err = new BufferedReader(new InputStreamReader(pythonProcess.getErrorStream()));
    while ((line = err.readLine()) != null) {
        log.warning(line);
    }
    err.close();
    int returnCode = pythonProcess.waitFor();
    if (returnCode != 0) {
        log.error("rescal python call returned error code: " + returnCode);
        return;
    }
    // load the predicted hints and send the to the event bus of the matching service
    BulkHintEvent hintsEvent = hintReader.readHints(rescalInputData);
    int numHints = (hintsEvent == null || hintsEvent.getHintEvents() == null) ? 0 : hintsEvent.getHintEvents().size();
    log.info("loaded {} hints into bulk hint event and publish", numHints);
    if (numHints > 0) {
        StringBuilder builder = new StringBuilder();
        for (HintEvent hint : hintsEvent.getHintEvents()) {
            builder.append("\n- " + hint);
        }
        log.info(builder.toString());
        pubSubMediator.tell(new DistributedPubSubMediator.Publish(hintsEvent.getClass().getName(), hintsEvent), getSelf());
    }
    lastQueryDate = queryDate;
}
Also used : TensorMatchingData(won.matcher.utils.tensor.TensorMatchingData) BulkHintEvent(won.matcher.service.common.event.BulkHintEvent) InputStreamReader(java.io.InputStreamReader) DistributedPubSubMediator(akka.cluster.pubsub.DistributedPubSubMediator) HintEvent(won.matcher.service.common.event.HintEvent) BulkHintEvent(won.matcher.service.common.event.BulkHintEvent) TensorEntryTokenizer(won.matcher.utils.tensor.TensorEntryTokenizer) TensorEntryAllGenerator(won.matcher.utils.tensor.TensorEntryAllGenerator) TensorEntry(won.matcher.utils.tensor.TensorEntry) BufferedReader(java.io.BufferedReader)

Aggregations

DistributedPubSubMediator (akka.cluster.pubsub.DistributedPubSubMediator)1 BufferedReader (java.io.BufferedReader)1 InputStreamReader (java.io.InputStreamReader)1 BulkHintEvent (won.matcher.service.common.event.BulkHintEvent)1 HintEvent (won.matcher.service.common.event.HintEvent)1 TensorEntry (won.matcher.utils.tensor.TensorEntry)1 TensorEntryAllGenerator (won.matcher.utils.tensor.TensorEntryAllGenerator)1 TensorEntryTokenizer (won.matcher.utils.tensor.TensorEntryTokenizer)1 TensorMatchingData (won.matcher.utils.tensor.TensorMatchingData)1