use of org.geneontology.rules.engine.WorkingMemory in project minerva by geneontology.
the class OperationsImpl method exportLegacy.
private void exportLegacy(M3BatchResponse response, ModelContainer model, String format, String userId) throws IOException, OWLOntologyCreationException, UnknownIdentifierException {
if ("gpad".equals(format)) {
initMetaResponse(response);
try {
GPADSPARQLExport exporter = new GPADSPARQLExport(curieHandler, m3.getLegacyRelationShorthandIndex(), m3.getTboxShorthandIndex(), m3.getGolego_repo().regulatorsToRegulated);
WorkingMemory wm = m3.createCanonicalInferredModel(model.getModelId());
response.data.exportModel = exporter.exportGPAD(wm, model.getModelId());
// response.data.exportModel = new GPADSPARQLExport(curieHandler, m3.getLegacyRelationShorthandIndex(), m3.getTboxShorthandIndex(), m3.getDoNotAnnotateSubset()).exportGPAD(
// m3.createCanonicalInferredModel(model.getModelId()));
} catch (InconsistentOntologyException e) {
response.messageType = MinervaResponse.MESSAGE_TYPE_ERROR;
response.message = "The model is inconsistent; a GPAD cannot be created.";
}
} else if ("explanations".equals(format)) {
initMetaResponse(response);
response.data.exportModel = ExportExplanation.exportExplanation(m3.createInferredModel(model.getModelId()), m3.getGolego_repo(), m3.getLegacyRelationShorthandIndex());
} else {
// if (exported == null) {
throw new IOException("Unknown export format: " + format);
// }
// initMetaResponse(response);
// response.data.exportModel = exported;
}
}
use of org.geneontology.rules.engine.WorkingMemory in project minerva by geneontology.
the class GPADSPARQLTest method testGPADOutputWithNegation.
/**
* Test whether the GPAD output contains all required entries and rows without any spurious results.
* Example Input file: the owl dump from http://noctua-dev.berkeleybop.org/editor/graph/gomodel:59d1072300000074
* <p>
* Note on the GPAD file format and its contents:
* 1. the number of entries in the GPAD output from this owl dump should be 6, not 7 (although there are 7 individuals/boxes)
* because the edge/relationship "molecular_function" is a trivial one, which is supposed to be removed from the output.
* 2. the 4th columns, which consists of the list of GO IDs attributed to the DB object ID (These should be GO:0005634, GO:0007267, GO:0007507, GO:0016301)
* 3. the 2nd columns: the rest of entities in the noctua screen, i.e. S000028630 (YFR032C-B Scer) or S000004724(SHH3 Scer)
*
* @throws Exception
*/
@Test
public void testGPADOutputWithNegation() throws Exception {
Model model = ModelFactory.createDefaultModel();
model.read(this.getClass().getResourceAsStream("/59d1072300000074.ttl"), "", "ttl");
Set<Triple> triples = model.listStatements().toList().stream().map(s -> Bridge.tripleFromJena(s.asTriple())).collect(Collectors.toSet());
WorkingMemory mem = arachne.processTriples(JavaConverters.asScalaSetConverter(triples).asScala());
String gpad = exporter.exportGPAD(mem, IRI.create("http://test.org"));
/* Check the number of rows in GPAD output */
String[] gpadOutputArr = gpad.split("\n", -1);
/* 1 for header and 6 for the rest of the rows. the length should be 7 or 8.*/
Assert.assertTrue("Should produce annotations", gpadOutputArr.length >= 1 + 6);
/* Compare the output with the GPAD file that contains sample answers */
List<String> lines = FileUtils.readLines(new File("src/test/resources/59d1072300000074.gpad"), "UTF-8");
/* The order of the rows in the GPAD file can be different, so we compare rows by rows */
for (String gpadOutputRow : gpadOutputArr) {
/* Additionally check all rows's qualifier contains |NOT substring inside */
String[] gpadRowArr = gpadOutputRow.split("\t");
/* Skip checking the header; all rows need to contain NOT in its qualifier */
if (gpadRowArr.length > 2) {
Assert.assertTrue(gpadRowArr[2].contains("|NOT"));
}
}
}
use of org.geneontology.rules.engine.WorkingMemory in project minerva by geneontology.
the class GPADSPARQLTest method testFilterAnnotationsToRegulatedProcess.
@Test
public void testFilterAnnotationsToRegulatedProcess() throws Exception {
HashMap<IRI, Set<IRI>> regulators = new HashMap<>();
regulators.put(IRI.create("http://purl.obolibrary.org/obo/GO_0030511"), Collections.singleton(IRI.create("http://purl.obolibrary.org/obo/GO_0007179")));
GPADSPARQLExport exporter = new GPADSPARQLExport(DefaultCurieHandler.getDefaultHandler(), new HashMap<IRI, String>(), new HashMap<IRI, String>(), regulators);
Model model = ModelFactory.createDefaultModel();
model.read(this.getClass().getResourceAsStream("/test_filter_regulated_process.ttl"), "", "ttl");
Set<Triple> triples = model.listStatements().toList().stream().map(s -> Bridge.tripleFromJena(s.asTriple())).collect(Collectors.toSet());
WorkingMemory mem = arachne.processTriples(JavaConverters.asScalaSetConverter(triples).asScala());
Set<GPADData> annotations = exporter.getGPAD(mem, IRI.create("http://test.org"));
IRI gene = IRI.create("http://identifiers.org/mgi/MGI:2148811");
IRI regulator = IRI.create("http://purl.obolibrary.org/obo/GO_0030511");
IRI regulated = IRI.create("http://purl.obolibrary.org/obo/GO_0007179");
Assert.assertTrue(annotations.stream().anyMatch(a -> a.getObject().equals(gene) && a.getOntologyClass().equals(regulator)));
Assert.assertTrue(annotations.stream().noneMatch(a -> a.getObject().equals(gene) && a.getOntologyClass().equals(regulated)));
}
use of org.geneontology.rules.engine.WorkingMemory in project minerva by geneontology.
the class GPADSPARQLTest method testFilterRootMFWhenOtherMF.
@Test
public void testFilterRootMFWhenOtherMF() throws Exception {
IRI rootMF = IRI.create("http://purl.obolibrary.org/obo/GO_0003674");
IRI rootBP = IRI.create("http://purl.obolibrary.org/obo/GO_0008150");
IRI rootCC = IRI.create("http://purl.obolibrary.org/obo/GO_0005575");
Model model = ModelFactory.createDefaultModel();
model.read(this.getClass().getResourceAsStream("/test_root_mf_filter.ttl"), "", "ttl");
Set<Triple> triples = model.listStatements().toList().stream().map(s -> Bridge.tripleFromJena(s.asTriple())).collect(Collectors.toSet());
WorkingMemory mem = arachne.processTriples(JavaConverters.asScalaSetConverter(triples).asScala());
Set<GPADData> annotations = exporter.getGPAD(mem, IRI.create("http://test.org"));
IRI gene = IRI.create("http://identifiers.org/mgi/MGI:2153470");
Assert.assertTrue(annotations.stream().noneMatch(a -> a.getObject().equals(gene) && a.getOntologyClass().equals(rootMF)));
Model model2 = ModelFactory.createDefaultModel();
model2.read(this.getClass().getResourceAsStream("/test_root_mf_filter2.ttl"), "", "ttl");
Set<Triple> triples2 = model2.listStatements().toList().stream().map(s -> Bridge.tripleFromJena(s.asTriple())).collect(Collectors.toSet());
WorkingMemory mem2 = arachne.processTriples(JavaConverters.asScalaSetConverter(triples2).asScala());
Set<GPADData> annotations2 = exporter.getGPAD(mem2, IRI.create("http://test.org"));
IRI gene2 = IRI.create("http://identifiers.org/mgi/MGI:98392");
Assert.assertTrue(annotations2.stream().anyMatch(a -> a.getObject().equals(gene2) && a.getOntologyClass().equals(rootMF)));
Assert.assertTrue(annotations2.stream().anyMatch(a -> a.getObject().equals(gene2) && a.getOntologyClass().equals(rootBP)));
Model model3 = ModelFactory.createDefaultModel();
model3.read(this.getClass().getResourceAsStream("/test_root_mf_filter3.ttl"), "", "ttl");
Set<Triple> triples3 = model3.listStatements().toList().stream().map(s -> Bridge.tripleFromJena(s.asTriple())).collect(Collectors.toSet());
WorkingMemory mem3 = arachne.processTriples(JavaConverters.asScalaSetConverter(triples3).asScala());
Set<GPADData> annotations3 = exporter.getGPAD(mem3, IRI.create("http://test.org"));
IRI gene3 = IRI.create("http://identifiers.org/sgd/S000002650");
Assert.assertTrue(annotations3.stream().anyMatch(a -> a.getObject().equals(gene3) && a.getOntologyClass().equals(rootMF)));
Assert.assertTrue(annotations3.stream().anyMatch(a -> a.getObject().equals(gene3) && a.getOntologyClass().equals(rootBP)));
Assert.assertTrue(annotations3.stream().anyMatch(a -> a.getObject().equals(gene3) && a.getOntologyClass().equals(rootCC)));
}
use of org.geneontology.rules.engine.WorkingMemory in project minerva by geneontology.
the class GPADSPARQLExport method getGPAD.
/* This is a bit convoluted in order to minimize redundant queries, for performance reasons. */
public Set<GPADData> getGPAD(WorkingMemory wm, IRI modelIRI) throws InconsistentOntologyException {
Model model = ModelFactory.createDefaultModel();
model.add(JavaConverters.setAsJavaSetConverter(wm.facts()).asJava().stream().map(t -> model.asStatement(Bridge.jenaFromTriple(t))).collect(Collectors.toList()));
if (!isConsistent(model))
throw new InconsistentOntologyException();
Map<String, String> modelLevelAnnotations = getModelAnnotations(model);
/* The first step of constructing GPAD records is to construct candidate/basic GPAD records by running gpad-basic.rq. */
QueryExecution qe = QueryExecutionFactory.create(mainQuery, model);
Set<GPADData> annotations = new HashSet<>();
// this is unpredictable if more than one
// String modelID = model.listResourcesWithProperty(RDF.type, OWL.Ontology).mapWith(r -> curieHandler.getCuri(IRI.create(r.getURI()))).next();
String modelID = curieHandler.getCuri(modelIRI);
ResultSet results = qe.execSelect();
Set<BasicGPADData> basicAnnotations = new HashSet<>();
while (results.hasNext()) {
QuerySolution qs = results.next();
BasicGPADData basicGPADData = new BasicGPADData(qs.getResource("pr").asNode(), IRI.create(qs.getResource("pr_type").getURI()), IRI.create(qs.getResource("rel").getURI()), qs.getResource("target").asNode(), IRI.create(qs.getResource("target_type").getURI()));
/* See whether the query answer contains not-null blank nodes, which are only set if the matching subgraph
* contains the property ComplementOf. If we see such cases, we set the operator field as NOT so that NOT value
* can be printed in GPAD. */
if (qs.getResource("blank_comp") != null)
basicGPADData.setOperator(GPADOperatorStatus.NOT);
basicAnnotations.add(basicGPADData);
}
qe.close();
/* The bindings of ?pr_type, ?rel, ?target_type are candidate mappings or values for the final GPAD records
* (i.e. not every mapping is used for building the final records of GPAD file; many of them are filtered out later).
* The mappings are
* ?pr_type: DB Object ID (2nd in GPAD), ?rel: Qualifier(3rd), ?target_type: GO ID(4th)
* The rest of fields in GPAD are then constructed by joining the candidate mappings with mappings describing evidences and so on.
* If the output of this exporter (i.e. GPAD files) does not contain the values you expect,
* dump the above "QuerySolution qs" variable and see whether they are included in the dump. */
Set<AnnotationExtension> possibleExtensions = possibleExtensions(basicAnnotations, model);
Set<Triple> statementsToExplain = new HashSet<>();
basicAnnotations.forEach(ba -> statementsToExplain.add(Triple.create(ba.getObjectNode(), NodeFactory.createURI(ba.getQualifier().toString()), ba.getOntologyClassNode())));
possibleExtensions.forEach(ae -> statementsToExplain.add(ae.getTriple()));
Map<Triple, Set<Explanation>> allExplanations = statementsToExplain.stream().collect(Collectors.toMap(Function.identity(), s -> toJava(wm.explain(Bridge.tripleFromJena(s)))));
Map<Triple, Set<GPADEvidence>> allEvidences = evidencesForFacts(allExplanations.values().stream().flatMap(es -> es.stream()).flatMap(e -> toJava(e.facts()).stream().map(t -> Bridge.jenaFromTriple(t))).collect(toSet()), model, modelID, modelLevelAnnotations);
Set<IRI> gpsWithAnyMFNotRootMF = basicAnnotations.stream().filter(a -> functionRelations.contains(a.getQualifier().toString())).filter(a -> !a.getOntologyClass().toString().equals(MF)).map(a -> a.getObject()).collect(toSet());
Map<Node, Set<IRI>> nodesToOntologyClasses = basicAnnotations.stream().collect(Collectors.groupingBy(BasicGPADData::getObjectNode, mapping(BasicGPADData::getOntologyClass, toSet())));
for (BasicGPADData annotation : basicAnnotations) {
Set<IRI> termsRegulatedByAnnotationsForThisGPNode = nodesToOntologyClasses.get(annotation.getObjectNode()).stream().flatMap(term -> regulators.getOrDefault(term, Collections.emptySet()).stream()).collect(toSet());
boolean regulationViolation = termsRegulatedByAnnotationsForThisGPNode.contains(annotation.getOntologyClass());
if (regulationViolation)
continue;
for (Explanation explanation : allExplanations.get(Triple.create(annotation.getObjectNode(), NodeFactory.createURI(annotation.getQualifier().toString()), annotation.getOntologyClassNode()))) {
Set<Triple> requiredFacts = toJava(explanation.facts()).stream().map(t -> Bridge.jenaFromTriple(t)).collect(toSet());
// Every statement in the explanation must have at least one evidence, unless the statement is a class assertion
if (requiredFacts.stream().filter(t -> !t.getPredicate().getURI().equals(RDF.type.getURI())).allMatch(f -> !(allEvidences.get(f).isEmpty()))) {
// The evidence used for the annotation must be on an edge to or from the target node
Stream<GPADEvidence> annotationEvidences = requiredFacts.stream().filter(f -> (f.getSubject().equals(annotation.getOntologyClassNode()) || f.getObject().equals(annotation.getOntologyClassNode()))).flatMap(f -> allEvidences.getOrDefault(f, Collections.emptySet()).stream());
annotationEvidences.forEach(currentEvidence -> {
String reference = currentEvidence.getReference();
Set<ConjunctiveExpression> goodExtensions = new HashSet<>();
for (AnnotationExtension extension : possibleExtensions) {
if (extension.getTriple().getSubject().equals(annotation.getOntologyClassNode()) && !(extension.getTriple().getObject().equals(annotation.getObjectNode()))) {
for (Explanation expl : allExplanations.get(extension.getTriple())) {
boolean allFactsOfExplanationHaveRefMatchingAnnotation = toJava(expl.facts()).stream().map(fact -> allEvidences.getOrDefault(Bridge.jenaFromTriple(fact), Collections.emptySet())).allMatch(evidenceSet -> evidenceSet.stream().anyMatch(ev -> ev.getReference().equals(reference)));
if (allFactsOfExplanationHaveRefMatchingAnnotation) {
goodExtensions.add(new DefaultConjunctiveExpression(IRI.create(extension.getTriple().getPredicate().getURI()), extension.getValueType()));
}
}
}
}
// Handle special case of EMAPA; don't include Uberon extensions
final boolean isMouseExtension = goodExtensions.stream().anyMatch(e -> e.getFiller().toString().startsWith(EMAPA_NAMESPACE));
if (isMouseExtension)
goodExtensions.removeIf(e -> e.getFiller().toString().startsWith(UBERON_NAMESPACE));
final boolean rootViolation;
if (rootTerms.contains(annotation.getOntologyClass().toString())) {
rootViolation = !ND.equals(currentEvidence.getEvidence().toString());
} else {
rootViolation = false;
}
final boolean rootMFWithOtherMF = annotation.getOntologyClass().toString().equals(MF) && gpsWithAnyMFNotRootMF.contains(annotation.getObject());
if (!rootViolation && !rootMFWithOtherMF) {
DefaultGPADData defaultGPADData = new DefaultGPADData(annotation.getObject(), annotation.getQualifier(), annotation.getOntologyClass(), goodExtensions, reference, currentEvidence.getEvidence(), currentEvidence.getWithOrFrom(), Optional.empty(), currentEvidence.getModificationDate(), currentEvidence.getAssignedBy(), currentEvidence.getAnnotations());
defaultGPADData.setOperator(annotation.getOperator());
annotations.add(defaultGPADData);
}
});
}
}
}
return annotations;
}
Aggregations