Search in sources :

Example 1 with ConjunctiveExpression

use of org.geneontology.minerva.legacy.sparql.GPADData.ConjunctiveExpression in project minerva by geneontology.

the class GPADSPARQLExport method getGPAD.

/* This is a bit convoluted in order to minimize redundant queries, for performance reasons. */
public Set<GPADData> getGPAD(WorkingMemory wm, IRI modelIRI) throws InconsistentOntologyException {
    Model model = ModelFactory.createDefaultModel();
    model.add(JavaConverters.setAsJavaSetConverter(wm.facts()).asJava().stream().map(t -> model.asStatement(Bridge.jenaFromTriple(t))).collect(Collectors.toList()));
    if (!isConsistent(model))
        throw new InconsistentOntologyException();
    Map<String, String> modelLevelAnnotations = getModelAnnotations(model);
    /* The first step of constructing GPAD records is to construct candidate/basic GPAD records by running gpad-basic.rq. */
    QueryExecution qe = QueryExecutionFactory.create(mainQuery, model);
    Set<GPADData> annotations = new HashSet<>();
    // this is unpredictable if more than one
    // String modelID = model.listResourcesWithProperty(RDF.type, OWL.Ontology).mapWith(r -> curieHandler.getCuri(IRI.create(r.getURI()))).next();
    String modelID = curieHandler.getCuri(modelIRI);
    ResultSet results = qe.execSelect();
    Set<BasicGPADData> basicAnnotations = new HashSet<>();
    while (results.hasNext()) {
        QuerySolution qs = results.next();
        BasicGPADData basicGPADData = new BasicGPADData(qs.getResource("pr").asNode(), IRI.create(qs.getResource("pr_type").getURI()), IRI.create(qs.getResource("rel").getURI()), qs.getResource("target").asNode(), IRI.create(qs.getResource("target_type").getURI()));
        /* See whether the query answer contains not-null blank nodes, which are only set if the matching subgraph
             * contains the property ComplementOf.  If we see such cases, we set the operator field as NOT so that NOT value
             * can be printed in GPAD. */
        if (qs.getResource("blank_comp") != null)
            basicGPADData.setOperator(GPADOperatorStatus.NOT);
        basicAnnotations.add(basicGPADData);
    }
    qe.close();
    /* The bindings of ?pr_type, ?rel, ?target_type are candidate mappings or values for the final GPAD records
         * (i.e. not every mapping is used for building the final records of GPAD file; many of them are filtered out later).
         * The mappings are
         * 		?pr_type: DB Object ID (2nd in GPAD), ?rel: Qualifier(3rd), ?target_type: GO ID(4th)
         * The rest of fields in GPAD are then constructed by joining the candidate mappings with mappings describing evidences and so on.
         * If the output of this exporter (i.e. GPAD files) does not contain the values you expect,
         * dump the above "QuerySolution qs" variable and see whether they are included in the dump. */
    Set<AnnotationExtension> possibleExtensions = possibleExtensions(basicAnnotations, model);
    Set<Triple> statementsToExplain = new HashSet<>();
    basicAnnotations.forEach(ba -> statementsToExplain.add(Triple.create(ba.getObjectNode(), NodeFactory.createURI(ba.getQualifier().toString()), ba.getOntologyClassNode())));
    possibleExtensions.forEach(ae -> statementsToExplain.add(ae.getTriple()));
    Map<Triple, Set<Explanation>> allExplanations = statementsToExplain.stream().collect(Collectors.toMap(Function.identity(), s -> toJava(wm.explain(Bridge.tripleFromJena(s)))));
    Map<Triple, Set<GPADEvidence>> allEvidences = evidencesForFacts(allExplanations.values().stream().flatMap(es -> es.stream()).flatMap(e -> toJava(e.facts()).stream().map(t -> Bridge.jenaFromTriple(t))).collect(toSet()), model, modelID, modelLevelAnnotations);
    Set<IRI> gpsWithAnyMFNotRootMF = basicAnnotations.stream().filter(a -> functionRelations.contains(a.getQualifier().toString())).filter(a -> !a.getOntologyClass().toString().equals(MF)).map(a -> a.getObject()).collect(toSet());
    Map<Node, Set<IRI>> nodesToOntologyClasses = basicAnnotations.stream().collect(Collectors.groupingBy(BasicGPADData::getObjectNode, mapping(BasicGPADData::getOntologyClass, toSet())));
    for (BasicGPADData annotation : basicAnnotations) {
        Set<IRI> termsRegulatedByAnnotationsForThisGPNode = nodesToOntologyClasses.get(annotation.getObjectNode()).stream().flatMap(term -> regulators.getOrDefault(term, Collections.emptySet()).stream()).collect(toSet());
        boolean regulationViolation = termsRegulatedByAnnotationsForThisGPNode.contains(annotation.getOntologyClass());
        if (regulationViolation)
            continue;
        for (Explanation explanation : allExplanations.get(Triple.create(annotation.getObjectNode(), NodeFactory.createURI(annotation.getQualifier().toString()), annotation.getOntologyClassNode()))) {
            Set<Triple> requiredFacts = toJava(explanation.facts()).stream().map(t -> Bridge.jenaFromTriple(t)).collect(toSet());
            // Every statement in the explanation must have at least one evidence, unless the statement is a class assertion
            if (requiredFacts.stream().filter(t -> !t.getPredicate().getURI().equals(RDF.type.getURI())).allMatch(f -> !(allEvidences.get(f).isEmpty()))) {
                // The evidence used for the annotation must be on an edge to or from the target node
                Stream<GPADEvidence> annotationEvidences = requiredFacts.stream().filter(f -> (f.getSubject().equals(annotation.getOntologyClassNode()) || f.getObject().equals(annotation.getOntologyClassNode()))).flatMap(f -> allEvidences.getOrDefault(f, Collections.emptySet()).stream());
                annotationEvidences.forEach(currentEvidence -> {
                    String reference = currentEvidence.getReference();
                    Set<ConjunctiveExpression> goodExtensions = new HashSet<>();
                    for (AnnotationExtension extension : possibleExtensions) {
                        if (extension.getTriple().getSubject().equals(annotation.getOntologyClassNode()) && !(extension.getTriple().getObject().equals(annotation.getObjectNode()))) {
                            for (Explanation expl : allExplanations.get(extension.getTriple())) {
                                boolean allFactsOfExplanationHaveRefMatchingAnnotation = toJava(expl.facts()).stream().map(fact -> allEvidences.getOrDefault(Bridge.jenaFromTriple(fact), Collections.emptySet())).allMatch(evidenceSet -> evidenceSet.stream().anyMatch(ev -> ev.getReference().equals(reference)));
                                if (allFactsOfExplanationHaveRefMatchingAnnotation) {
                                    goodExtensions.add(new DefaultConjunctiveExpression(IRI.create(extension.getTriple().getPredicate().getURI()), extension.getValueType()));
                                }
                            }
                        }
                    }
                    // Handle special case of EMAPA; don't include Uberon extensions
                    final boolean isMouseExtension = goodExtensions.stream().anyMatch(e -> e.getFiller().toString().startsWith(EMAPA_NAMESPACE));
                    if (isMouseExtension)
                        goodExtensions.removeIf(e -> e.getFiller().toString().startsWith(UBERON_NAMESPACE));
                    final boolean rootViolation;
                    if (rootTerms.contains(annotation.getOntologyClass().toString())) {
                        rootViolation = !ND.equals(currentEvidence.getEvidence().toString());
                    } else {
                        rootViolation = false;
                    }
                    final boolean rootMFWithOtherMF = annotation.getOntologyClass().toString().equals(MF) && gpsWithAnyMFNotRootMF.contains(annotation.getObject());
                    if (!rootViolation && !rootMFWithOtherMF) {
                        DefaultGPADData defaultGPADData = new DefaultGPADData(annotation.getObject(), annotation.getQualifier(), annotation.getOntologyClass(), goodExtensions, reference, currentEvidence.getEvidence(), currentEvidence.getWithOrFrom(), Optional.empty(), currentEvidence.getModificationDate(), currentEvidence.getAssignedBy(), currentEvidence.getAnnotations());
                        defaultGPADData.setOperator(annotation.getOperator());
                        annotations.add(defaultGPADData);
                    }
                });
            }
        }
    }
    return annotations;
}
Also used : Binding(org.apache.jena.sparql.engine.binding.Binding) RDF(org.apache.jena.vocabulary.RDF) java.util(java.util) NodeFactory(org.apache.jena.graph.NodeFactory) BindingMap(org.apache.jena.sparql.engine.binding.BindingMap) Function(java.util.function.Function) Model(org.apache.jena.rdf.model.Model) Explanation(org.geneontology.rules.engine.Explanation) WorkingMemory(org.geneontology.rules.engine.WorkingMemory) Logger(org.apache.log4j.Logger) Pair(org.apache.commons.lang3.tuple.Pair) IRI(org.semanticweb.owlapi.model.IRI) Resource(org.apache.jena.rdf.model.Resource) Collectors.mapping(java.util.stream.Collectors.mapping) BindingFactory(org.apache.jena.sparql.engine.binding.BindingFactory) Collectors.toSet(java.util.stream.Collectors.toSet) InconsistentOntologyException(org.semanticweb.owlapi.reasoner.InconsistentOntologyException) org.apache.jena.query(org.apache.jena.query) ConjunctiveExpression(org.geneontology.minerva.legacy.sparql.GPADData.ConjunctiveExpression) Triple(org.apache.jena.graph.Triple) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) IOUtils(org.apache.commons.io.IOUtils) Stream(java.util.stream.Stream) Var(org.apache.jena.sparql.core.Var) Node(org.apache.jena.graph.Node) Literal(org.apache.jena.rdf.model.Literal) CurieHandler(org.geneontology.minerva.curie.CurieHandler) JavaConverters(scala.collection.JavaConverters) Bridge(org.geneontology.rules.util.Bridge) ModelFactory(org.apache.jena.rdf.model.ModelFactory) IRI(org.semanticweb.owlapi.model.IRI) Collectors.toSet(java.util.stream.Collectors.toSet) Explanation(org.geneontology.rules.engine.Explanation) Node(org.apache.jena.graph.Node) InconsistentOntologyException(org.semanticweb.owlapi.reasoner.InconsistentOntologyException) ConjunctiveExpression(org.geneontology.minerva.legacy.sparql.GPADData.ConjunctiveExpression) Triple(org.apache.jena.graph.Triple) Model(org.apache.jena.rdf.model.Model)

Aggregations

IOException (java.io.IOException)1 StandardCharsets (java.nio.charset.StandardCharsets)1 java.util (java.util)1 Function (java.util.function.Function)1 Collectors (java.util.stream.Collectors)1 Collectors.mapping (java.util.stream.Collectors.mapping)1 Collectors.toSet (java.util.stream.Collectors.toSet)1 Stream (java.util.stream.Stream)1 IOUtils (org.apache.commons.io.IOUtils)1 Pair (org.apache.commons.lang3.tuple.Pair)1 Node (org.apache.jena.graph.Node)1 NodeFactory (org.apache.jena.graph.NodeFactory)1 Triple (org.apache.jena.graph.Triple)1 org.apache.jena.query (org.apache.jena.query)1 Literal (org.apache.jena.rdf.model.Literal)1 Model (org.apache.jena.rdf.model.Model)1 ModelFactory (org.apache.jena.rdf.model.ModelFactory)1 Resource (org.apache.jena.rdf.model.Resource)1 Var (org.apache.jena.sparql.core.Var)1 Binding (org.apache.jena.sparql.engine.binding.Binding)1