use of org.geneontology.rules.engine.Explanation in project minerva by geneontology.
the class GPADSPARQLExport method getGPAD.
/* This is a bit convoluted in order to minimize redundant queries, for performance reasons. */
public Set<GPADData> getGPAD(WorkingMemory wm, IRI modelIRI) throws InconsistentOntologyException {
Model model = ModelFactory.createDefaultModel();
model.add(JavaConverters.setAsJavaSetConverter(wm.facts()).asJava().stream().map(t -> model.asStatement(Bridge.jenaFromTriple(t))).collect(Collectors.toList()));
if (!isConsistent(model))
throw new InconsistentOntologyException();
Map<String, String> modelLevelAnnotations = getModelAnnotations(model);
/* The first step of constructing GPAD records is to construct candidate/basic GPAD records by running gpad-basic.rq. */
QueryExecution qe = QueryExecutionFactory.create(mainQuery, model);
Set<GPADData> annotations = new HashSet<>();
// this is unpredictable if more than one
// String modelID = model.listResourcesWithProperty(RDF.type, OWL.Ontology).mapWith(r -> curieHandler.getCuri(IRI.create(r.getURI()))).next();
String modelID = curieHandler.getCuri(modelIRI);
ResultSet results = qe.execSelect();
Set<BasicGPADData> basicAnnotations = new HashSet<>();
while (results.hasNext()) {
QuerySolution qs = results.next();
BasicGPADData basicGPADData = new BasicGPADData(qs.getResource("pr").asNode(), IRI.create(qs.getResource("pr_type").getURI()), IRI.create(qs.getResource("rel").getURI()), qs.getResource("target").asNode(), IRI.create(qs.getResource("target_type").getURI()));
/* See whether the query answer contains not-null blank nodes, which are only set if the matching subgraph
* contains the property ComplementOf. If we see such cases, we set the operator field as NOT so that NOT value
* can be printed in GPAD. */
if (qs.getResource("blank_comp") != null)
basicGPADData.setOperator(GPADOperatorStatus.NOT);
basicAnnotations.add(basicGPADData);
}
qe.close();
/* The bindings of ?pr_type, ?rel, ?target_type are candidate mappings or values for the final GPAD records
* (i.e. not every mapping is used for building the final records of GPAD file; many of them are filtered out later).
* The mappings are
* ?pr_type: DB Object ID (2nd in GPAD), ?rel: Qualifier(3rd), ?target_type: GO ID(4th)
* The rest of fields in GPAD are then constructed by joining the candidate mappings with mappings describing evidences and so on.
* If the output of this exporter (i.e. GPAD files) does not contain the values you expect,
* dump the above "QuerySolution qs" variable and see whether they are included in the dump. */
Set<AnnotationExtension> possibleExtensions = possibleExtensions(basicAnnotations, model);
Set<Triple> statementsToExplain = new HashSet<>();
basicAnnotations.forEach(ba -> statementsToExplain.add(Triple.create(ba.getObjectNode(), NodeFactory.createURI(ba.getQualifier().toString()), ba.getOntologyClassNode())));
possibleExtensions.forEach(ae -> statementsToExplain.add(ae.getTriple()));
Map<Triple, Set<Explanation>> allExplanations = statementsToExplain.stream().collect(Collectors.toMap(Function.identity(), s -> toJava(wm.explain(Bridge.tripleFromJena(s)))));
Map<Triple, Set<GPADEvidence>> allEvidences = evidencesForFacts(allExplanations.values().stream().flatMap(es -> es.stream()).flatMap(e -> toJava(e.facts()).stream().map(t -> Bridge.jenaFromTriple(t))).collect(toSet()), model, modelID, modelLevelAnnotations);
Set<IRI> gpsWithAnyMFNotRootMF = basicAnnotations.stream().filter(a -> functionRelations.contains(a.getQualifier().toString())).filter(a -> !a.getOntologyClass().toString().equals(MF)).map(a -> a.getObject()).collect(toSet());
Map<Node, Set<IRI>> nodesToOntologyClasses = basicAnnotations.stream().collect(Collectors.groupingBy(BasicGPADData::getObjectNode, mapping(BasicGPADData::getOntologyClass, toSet())));
for (BasicGPADData annotation : basicAnnotations) {
Set<IRI> termsRegulatedByAnnotationsForThisGPNode = nodesToOntologyClasses.get(annotation.getObjectNode()).stream().flatMap(term -> regulators.getOrDefault(term, Collections.emptySet()).stream()).collect(toSet());
boolean regulationViolation = termsRegulatedByAnnotationsForThisGPNode.contains(annotation.getOntologyClass());
if (regulationViolation)
continue;
for (Explanation explanation : allExplanations.get(Triple.create(annotation.getObjectNode(), NodeFactory.createURI(annotation.getQualifier().toString()), annotation.getOntologyClassNode()))) {
Set<Triple> requiredFacts = toJava(explanation.facts()).stream().map(t -> Bridge.jenaFromTriple(t)).collect(toSet());
// Every statement in the explanation must have at least one evidence, unless the statement is a class assertion
if (requiredFacts.stream().filter(t -> !t.getPredicate().getURI().equals(RDF.type.getURI())).allMatch(f -> !(allEvidences.get(f).isEmpty()))) {
// The evidence used for the annotation must be on an edge to or from the target node
Stream<GPADEvidence> annotationEvidences = requiredFacts.stream().filter(f -> (f.getSubject().equals(annotation.getOntologyClassNode()) || f.getObject().equals(annotation.getOntologyClassNode()))).flatMap(f -> allEvidences.getOrDefault(f, Collections.emptySet()).stream());
annotationEvidences.forEach(currentEvidence -> {
String reference = currentEvidence.getReference();
Set<ConjunctiveExpression> goodExtensions = new HashSet<>();
for (AnnotationExtension extension : possibleExtensions) {
if (extension.getTriple().getSubject().equals(annotation.getOntologyClassNode()) && !(extension.getTriple().getObject().equals(annotation.getObjectNode()))) {
for (Explanation expl : allExplanations.get(extension.getTriple())) {
boolean allFactsOfExplanationHaveRefMatchingAnnotation = toJava(expl.facts()).stream().map(fact -> allEvidences.getOrDefault(Bridge.jenaFromTriple(fact), Collections.emptySet())).allMatch(evidenceSet -> evidenceSet.stream().anyMatch(ev -> ev.getReference().equals(reference)));
if (allFactsOfExplanationHaveRefMatchingAnnotation) {
goodExtensions.add(new DefaultConjunctiveExpression(IRI.create(extension.getTriple().getPredicate().getURI()), extension.getValueType()));
}
}
}
}
// Handle special case of EMAPA; don't include Uberon extensions
final boolean isMouseExtension = goodExtensions.stream().anyMatch(e -> e.getFiller().toString().startsWith(EMAPA_NAMESPACE));
if (isMouseExtension)
goodExtensions.removeIf(e -> e.getFiller().toString().startsWith(UBERON_NAMESPACE));
final boolean rootViolation;
if (rootTerms.contains(annotation.getOntologyClass().toString())) {
rootViolation = !ND.equals(currentEvidence.getEvidence().toString());
} else {
rootViolation = false;
}
final boolean rootMFWithOtherMF = annotation.getOntologyClass().toString().equals(MF) && gpsWithAnyMFNotRootMF.contains(annotation.getObject());
if (!rootViolation && !rootMFWithOtherMF) {
DefaultGPADData defaultGPADData = new DefaultGPADData(annotation.getObject(), annotation.getQualifier(), annotation.getOntologyClass(), goodExtensions, reference, currentEvidence.getEvidence(), currentEvidence.getWithOrFrom(), Optional.empty(), currentEvidence.getModificationDate(), currentEvidence.getAssignedBy(), currentEvidence.getAnnotations());
defaultGPADData.setOperator(annotation.getOperator());
annotations.add(defaultGPADData);
}
});
}
}
}
return annotations;
}
Aggregations