use of org.geneontology.minerva.validation.pipeline.BatchPipelineValidationReport in project minerva by geneontology.
the class CommandLineInterface method validateGoCams.
/**
* --validate-go-cams
* -i /GitHub/GO_Shapes/test_ttl/go_cams/should_pass/
* -c ./catalog-no-import.xml
*
* @param input
* @param basicOutputFile
* @param explanationOutputFile
* @param ontologyIRI
* @param catalog
* @param modelIdPrefix
* @param modelIdcurie
* @param shexpath
* @param shapemappath
* @param travisMode
* @param shouldPass
* @throws IOException
* @throws OWLOntologyCreationException
*/
public static void validateGoCams(String input, String outputFolder, String ontologyIRI, String catalog, String modelIdPrefix, String modelIdcurie, String shexpath, String shapemappath, boolean travisMode, boolean shouldFail, boolean checkShex, String go_lego_journal_file, boolean run_reasoner_report) throws OWLOntologyCreationException, IOException {
LOGGER.setLevel(Level.INFO);
String inputDB = "blazegraph.jnl";
String shexFileUrl = "https://raw.githubusercontent.com/geneontology/go-shapes/master/shapes/go-cam-shapes.shex";
String goshapemapFileUrl = "https://raw.githubusercontent.com/geneontology/go-shapes/master/shapes/go-cam-shapes.shapeMap";
CurieMappings localMappings = new CurieMappings.SimpleCurieMappings(Collections.singletonMap(modelIdcurie, modelIdPrefix));
CurieHandler curieHandler = new MappedCurieHandler(DefaultCurieHandler.loadDefaultMappings(), localMappings);
Map<String, String> modelid_filename = new HashMap<String, String>();
if (outputFolder == null) {
LOGGER.error("please specify an output folder with -r ");
System.exit(-1);
} else if (!outputFolder.endsWith("/")) {
outputFolder += "/";
}
if (input == null) {
LOGGER.error("please provide an input file - either a directory of ttl files or a blazegraph journal");
System.exit(-1);
}
LOGGER.info("loading tbox ontology: " + ontologyIRI);
OWLOntologyManager ontman = OWLManager.createOWLOntologyManager();
if (catalog != null) {
LOGGER.info("using catalog: " + catalog);
try {
ontman.setIRIMappers(Sets.newHashSet(new CatalogXmlIRIMapper(catalog)));
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
LOGGER.info("no catalog, resolving all ontology uris directly");
}
OWLOntology tbox_ontology = null;
try {
tbox_ontology = ontman.loadOntology(IRI.create(ontologyIRI));
LOGGER.info("tbox ontology axioms loaded: " + tbox_ontology.getAxiomCount());
} catch (OWLOntologyCreationException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
// either load directly from existing journal
if (input.endsWith(".jnl")) {
inputDB = input;
} else {
// or make sure that the journal file provided is cleared out and ready
File i = new File(input);
if (i.exists()) {
// remove anything that existed earlier
File bgdb = new File(inputDB);
if (bgdb.exists()) {
bgdb.delete();
}
}
}
// make the manager
LOGGER.info("Setting up model manager and initializing rules for Arachne reasoner");
UndoAwareMolecularModelManager m3 = new UndoAwareMolecularModelManager(tbox_ontology, curieHandler, modelIdPrefix, inputDB, null, go_lego_journal_file, true);
// if provided a directory as input, load them ttl files into the manager
File i = new File(input);
if (i.exists() && !input.endsWith(".jnl")) {
if (i.isDirectory()) {
LOGGER.info("Loading models from " + i.getAbsolutePath());
Set<String> model_iris = new HashSet<String>();
FileUtils.listFiles(i, null, true).parallelStream().parallel().forEach(file -> {
if (file.getName().endsWith(".ttl") || file.getName().endsWith("owl")) {
try {
String modeluri = m3.importModelToDatabase(file, true);
if (modeluri == null) {
LOGGER.error("Null model IRI: " + modeluri + " file: " + file);
} else if (!model_iris.add(modeluri)) {
LOGGER.error("Multiple models with same IRI: " + modeluri + " file: " + file + " file: " + modelid_filename.get(modeluri));
} else {
modelid_filename.put(modeluri, file.getName());
}
} catch (OWLOntologyCreationException | RepositoryException | RDFParseException | RDFHandlerException | IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
});
} else {
// just load the one provided
LOGGER.info("Loading " + i);
try {
m3.importModelToDatabase(i, true);
} catch (OWLOntologyCreationException | RepositoryException | RDFParseException | RDFHandlerException | IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
LOGGER.info("loaded files into blazegraph journal: " + input);
}
// now set up shex validator
if (shexpath == null) {
// fall back on downloading from shapes repo
URL shex_schema_url;
try {
shex_schema_url = new URL(shexFileUrl);
shexpath = "./go-cam-schema.shex";
File shex_schema_file = new File(shexpath);
org.apache.commons.io.FileUtils.copyURLToFile(shex_schema_url, shex_schema_file);
System.err.println("-s .No shex schema provided, using: " + shexFileUrl);
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
if (shapemappath == null) {
URL shex_map_url;
try {
shex_map_url = new URL(goshapemapFileUrl);
shapemappath = "./go-cam-shapes.shapeMap";
File shex_map_file = new File(shapemappath);
org.apache.commons.io.FileUtils.copyURLToFile(shex_map_url, shex_map_file);
System.err.println("-m .No shape map file provided, using: " + goshapemapFileUrl);
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
LOGGER.info("making shex validator: " + shexpath + " " + shapemappath + " " + curieHandler + " ");
MinervaShexValidator shex = null;
try {
shex = new MinervaShexValidator(shexpath, shapemappath, curieHandler, m3.getGolego_repo());
} catch (Exception e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
if (checkShex) {
shex.setActive(true);
} else {
shex.setActive(false);
}
// shex validator is ready, now build the inference provider (which provides access to the shex validator and provides inferences useful for shex)
String reasonerOpt = "arachne";
LOGGER.info("Building OWL inference provider: " + reasonerOpt);
InferenceProviderCreator ipc = StartUpTool.createInferenceProviderCreator(reasonerOpt, m3, shex);
LOGGER.info("Validating models: " + reasonerOpt);
// Set up all the report files.
String basic_output_file = outputFolder + "main_report.txt";
String explanations_file = outputFolder + "explanations.txt";
String activity_output_file = outputFolder + "activity_report.txt";
if (outputFolder != null) {
try {
// valid or not
FileWriter basic_shex_output = new FileWriter(basic_output_file, false);
basic_shex_output.write("filename\tmodel_title\tmodel_url\tmodelstate\tcontributor\tprovider\tdate\tOWL_consistent\tshex_valid\tshex_meta_problem\tshex_data_problem\tvalidation_time_milliseconds\taxioms\tn_rows_gpad\t");
basic_shex_output.write(GoCamModelStats.statsHeader() + "\n");
basic_shex_output.close();
// tab delimited explanations for failures
FileWriter explanations = new FileWriter(explanations_file, false);
explanations.write("filename\tmodel_title\tmodel_iri\tnode\tNode_types\tproperty\tIntended_range_shapes\tobject\tObject_types\tObject_shapes\n");
explanations.close();
// tab delimited summary of properties of activity units
FileWriter activity_output = new FileWriter(activity_output_file, false);
activity_output.write("filename\tmodel_title\tmodel_url\tmodelstate\tcontributor\tprovider\tdate\tactivity_iri\tactivity_xref\tactivity_label\tcomplete\tinputs\toutputs\tenablers\tlocations\tcausal upstream\tcausal downstream\tpart of n BP\tMF\tBP\n");
activity_output.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
// this will generate the json file used for the go rules report for the pipeline
BatchPipelineValidationReport pipe_report = null;
Set<ErrorMessage> owl_errors = new HashSet<ErrorMessage>();
Set<ErrorMessage> shex_errors = new HashSet<ErrorMessage>();
pipe_report = new BatchPipelineValidationReport();
try {
pipe_report.setNumber_of_models(m3.getAvailableModelIds().size());
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
int bad_models = 0;
int good_models = 0;
final boolean shex_output = checkShex;
// only used if OWL reasoning report is requested
ReasonerReport reasoner_report = null;
if (run_reasoner_report) {
reasoner_report = initReasonerReport(outputFolder);
}
// now process each gocam
try {
for (IRI modelIRI : m3.getAvailableModelIds()) {
long start = System.currentTimeMillis();
String filename = modelid_filename.get(modelIRI.toString());
// OWL
boolean isConsistent = true;
// shex
boolean isConformant = true;
if (filename != null) {
LOGGER.info("processing " + filename + "\t" + modelIRI);
} else {
LOGGER.info("processing \t" + modelIRI);
}
// this is where everything actually happens
ModelContainer mc = m3.getModel(modelIRI);
OWLOntology gocam = mc.getAboxOntology();
try {
// if a model does not have an import statement that links in an ontology that defines all of its classes and object properties
// or if the model does not define the classes and object properties itself, parsing problems will prevail
// this step makes sure that does not happen
gocam = CoreMolecularModelManager.fixBrokenObjectPropertiesAndAxioms(gocam);
} catch (OWLOntologyCreationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
LOGGER.info("preparing model stats...");
// The GoCamModel code is used to capture model-level statistics such as 'how many causal relations are there?'
// This might be an area for a speed improvement if needed
GoCamModel gcm = new GoCamModel(gocam, m3);
String title = "title";
if (gcm.getTitle() != null) {
title = makeColSafe(gcm.getTitle());
} else {
LOGGER.error("no title for " + filename);
}
// this is to make clickable links in reports
String link = modelIRI.toString().replace("http://model.geneontology.org/", "http://noctua.geneontology.org/editor/graph/gomodel:");
if (modelIRI.toString().contains("R-HSA")) {
link = link.replace("noctua.geneontology", "noctua-dev.berkeleybop");
}
String modelstate = makeColSafe(gcm.getModelstate());
String contributor = makeColSafe(gcm.getContributors().toString());
String date = makeColSafe(gcm.getDate());
String provider = makeColSafe(gcm.getProvided_by().toString());
pipe_report.setTaxa(gcm.getIn_taxon());
LOGGER.info("model stats done for title: " + title);
int axioms = gocam.getAxiomCount();
// add activity level statistics as a default
FileWriter activity_output = new FileWriter(activity_output_file, true);
for (ActivityUnit unit : gcm.getActivities()) {
activity_output.write(filename + "\t" + title + "\t" + link + "\t" + modelstate + "\t" + contributor + "\t" + provider + "\t" + date + "\t" + unit.getIndividual().getIRI().toString() + "\t" + unit.getXref() + "\t" + unit.getLabel() + "\t");
activity_output.write(unit.isComplete() + "\t" + unit.getInputs().size() + "\t" + unit.getOutputs().size() + "\t" + unit.getEnablers().size() + "\t" + unit.getLocations().size() + "\t" + unit.getCausal_in().size() + "\t" + unit.getCausal_out().size() + "\t" + unit.getContaining_processes().size() + "\t" + unit.stringForClasses(unit.getDirect_types()) + "\t" + unit.getURIsForConnectedBPs() + "\n");
}
activity_output.close();
InferenceProvider ip = ipc.create(mc);
isConsistent = ip.isConsistent();
// TODO re-use reasoner object from ip
// TODO this is another area that could be touched/removed for speed improvement
int n_rows_gpad = 0;
if (isConsistent) {
try {
Set<GPADData> gpad = new GPADSPARQLExport(curieHandler, m3.getLegacyRelationShorthandIndex(), m3.getTboxShorthandIndex(), m3.getGolego_repo().regulatorsToRegulated).getGPAD(m3.createInferredModel(modelIRI), modelIRI);
if (gpad != null) {
n_rows_gpad = gpad.size();
}
} catch (InconsistentOntologyException e) {
LOGGER.error("inconsistent ontology, can't make gpad");
}
}
long done = System.currentTimeMillis();
long milliseconds = (done - start);
// for rules report in pipeline
if (!ip.isConsistent()) {
String level = "ERROR";
String model_id = curieHandler.getCuri(modelIRI);
String message = BatchPipelineValidationReport.getOwlMessage();
int rule = BatchPipelineValidationReport.getOwlRule();
ErrorMessage owl = new ErrorMessage(level, model_id, gcm.getIn_taxon(), message, rule);
owl_errors.add(owl);
}
if (!isConsistent) {
FileWriter explanations = new FileWriter(explanations_file, true);
explanations.write(filename + "\t" + title + "\t" + modelIRI + "\tOWL fail explanation: " + ip.getValidation_results().getOwlvalidation().getAsText() + "\n");
explanations.close();
}
// travis mode causes the system to exit when an invalid model is detected (unless shouldFail is on)
if (travisMode && !isConsistent) {
if (!shouldFail) {
LOGGER.error(filename + "\t" + title + "\t" + modelIRI + "\tOWL:is inconsistent, quitting");
System.exit(-1);
}
}
// basic is just one row per model - did it validate or not
FileWriter basic = new FileWriter(basic_output_file, true);
if (!shex_output) {
if (ip.isConsistent()) {
good_models++;
} else {
bad_models++;
}
} else {
ValidationResultSet validations = ip.getValidation_results();
isConformant = validations.allConformant();
if (isConformant) {
good_models++;
} else {
bad_models++;
}
if (!validations.getShexvalidation().isConformant()) {
String level = "WARNING";
String model_id = curieHandler.getCuri(modelIRI);
String message = BatchPipelineValidationReport.getShexMessage();
int rule = BatchPipelineValidationReport.getShexRule();
ErrorMessage shex_message = new ErrorMessage(level, model_id, gcm.getIn_taxon(), message, rule);
// TODO set as a parameter
boolean include_explanations_in_json = true;
if (include_explanations_in_json) {
shex_message.setExplanations(validations);
}
shex_errors.add(shex_message);
FileWriter explanations = new FileWriter(explanations_file, true);
explanations.write(ip.getValidation_results().getShexvalidation().getAsTab(filename + "\t" + title + "\t" + modelIRI));
explanations.close();
}
if (travisMode) {
if (!isConformant && !shouldFail) {
LOGGER.error(filename + "\t" + title + "\t" + modelIRI + "\tshex is nonconformant, quitting, explanation:\n" + ip.getValidation_results().getShexvalidation().getAsText());
System.exit(-1);
} else if (isConformant && shouldFail) {
LOGGER.error(filename + "\t" + title + "\t" + modelIRI + "\tshex validates, but it should not be, quitting");
System.exit(-1);
}
}
// is it a metadata violation or data ?
boolean shex_meta_problem = false;
boolean shex_data_problem = false;
if (!validations.getShexvalidation().isConformant()) {
String model_curie = curieHandler.getCuri(modelIRI);
ValidationResultSet validationset = ip.getValidation_results();
ShexValidationReport shex_report = validationset.getShexvalidation();
Set<Violation> violations = shex_report.getViolations();
if (violations != null) {
for (Violation v : violations) {
if (v.getNode().equals(model_curie)) {
shex_meta_problem = true;
} else {
shex_data_problem = true;
}
}
} else {
LOGGER.error("Invalid model but no violations reported");
}
}
LOGGER.info(filename + "\t" + title + "\t" + modelIRI + "\tOWL:" + isConsistent + "\tshex:" + isConformant);
basic.write(filename + "\t" + title + "\t" + link + "\t" + modelstate + "\t" + contributor + "\t" + provider + "\t" + date + "\t" + isConsistent + "\t" + isConformant + "\t" + shex_meta_problem + "\t" + shex_data_problem + "\t" + milliseconds + "\t" + axioms + "\t" + n_rows_gpad + "\t" + gcm.getGoCamModelStats().stats2cols() + "\n");
}
basic.close();
if (run_reasoner_report) {
addReasonerReport(outputFolder, gocam, ip, title, reasoner_report);
}
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if (run_reasoner_report) {
summarizeReasonerReport(outputFolder, reasoner_report);
}
pipe_report.setNumber_of_correct_models(good_models);
pipe_report.setNumber_of_models_in_error(bad_models);
pipe_report.getMessages().put(BatchPipelineValidationReport.getShexRuleString(), shex_errors);
pipe_report.getMessages().put(BatchPipelineValidationReport.getOwlRuleString(), owl_errors);
GsonBuilder builder = new GsonBuilder();
Gson gson = builder.setPrettyPrinting().create();
String json = gson.toJson(pipe_report);
try {
FileWriter pipe_json = new FileWriter(outputFolder + "gorules_report.json", false);
pipe_json.write(json);
pipe_json.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
m3.dispose();
LOGGER.info("done with validation");
}
Aggregations