use of uk.ac.ebi.spot.goci.model.RiskAllele in project goci by EBISPOT.
the class DefaultGWASOWLConverter method convertAssociation.
protected void convertAssociation(Association association, OWLOntology ontology, Set<String> issuedWarnings) {
// get the trait association class
OWLClass taClass = getDataFactory().getOWLClass(IRI.create(OntologyConstants.TRAIT_ASSOCIATION_CLASS_IRI));
IRI taIndIRI = getMinter().mint(OntologyConstants.GWAS_ONTOLOGY_BASE_IRI, association);
// create a new trait association instance
OWLNamedIndividual taIndiv = getDataFactory().getOWLNamedIndividual(taIndIRI);
// assert class membership
OWLClassAssertionAxiom classAssertion = getDataFactory().getOWLClassAssertionAxiom(taClass, taIndiv);
getManager().addAxiom(ontology, classAssertion);
// get datatype relations
OWLDataProperty has_p_value = getDataFactory().getOWLDataProperty(IRI.create(OntologyConstants.HAS_P_VALUE_PROPERTY_IRI));
// get annotation relations
OWLAnnotationProperty rdfsLabel = getDataFactory().getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI());
//pvalue but says it was less then 10-6. So if we have no pvalue we just don't add it.
if (association.getPvalueMantissa() != null && association.getPvalueExponent() != null) {
double pval = association.getPvalueMantissa() * Math.pow(10, association.getPvalueExponent());
OWLLiteral pValue = getDataFactory().getOWLLiteral(pval);
// OWLLiteral pValue = getDataFactory().getOWLLiteral(association.getPvalueMantissa()+"e"+association.getPvalueExponent());
OWLDataPropertyAssertionAxiom p_value_relation = getDataFactory().getOWLDataPropertyAssertionAxiom(has_p_value, taIndiv, pValue);
AddAxiom add_p_value = new AddAxiom(ontology, p_value_relation);
getManager().applyChange(add_p_value);
}
// get the snp instance for this association
OWLNamedIndividual snpIndiv;
String rsId = null;
for (Locus locus : association.getLoci()) {
for (RiskAllele riskAllele : locus.getStrongestRiskAlleles()) {
SingleNucleotidePolymorphism snp = riskAllele.getSnp();
rsId = snp.getRsId();
snpIndiv = getDataFactory().getOWLNamedIndividual(getMinter().mint(OntologyConstants.GWAS_ONTOLOGY_BASE_IRI, snp));
if (snpIndiv == null) {
String warning = "A new SNP with the given RSID only will be created";
if (!issuedWarnings.contains(warning)) {
getLog().warn(warning);
issuedWarnings.add(warning);
}
snpIndiv = getDataFactory().getOWLNamedIndividual(getMinter().mint(OntologyConstants.GWAS_ONTOLOGY_BASE_IRI, "SingleNucleotidePolymorphism", snp.getRsId(), true));
// assert class membership
OWLClass snpClass = getDataFactory().getOWLClass(IRI.create(OntologyConstants.SNP_CLASS_IRI));
OWLClassAssertionAxiom snpClassAssertion = getDataFactory().getOWLClassAssertionAxiom(snpClass, snpIndiv);
getManager().addAxiom(ontology, snpClassAssertion);
// assert rsid relation
OWLDataProperty has_snp_rsid = getDataFactory().getOWLDataProperty(IRI.create(OntologyConstants.HAS_SNP_REFERENCE_ID_PROPERTY_IRI));
OWLLiteral rsid = getDataFactory().getOWLLiteral(snp.getRsId());
OWLDataPropertyAssertionAxiom rsid_relation = getDataFactory().getOWLDataPropertyAssertionAxiom(has_snp_rsid, snpIndiv, rsid);
AddAxiom add_rsid = new AddAxiom(ontology, rsid_relation);
getManager().applyChange(add_rsid);
// assert label
OWLAnnotationAssertionAxiom snp_label_annotation = getDataFactory().getOWLAnnotationAssertionAxiom(rdfsLabel, snpIndiv.getIRI(), rsid);
AddAxiom add_snp_label = new AddAxiom(ontology, snp_label_annotation);
getManager().applyChange(add_snp_label);
}
// get object properties
OWLObjectProperty has_subject = getDataFactory().getOWLObjectProperty(IRI.create(OntologyConstants.HAS_SUBJECT_IRI));
OWLObjectProperty is_subject_of = getDataFactory().getOWLObjectProperty(IRI.create(OntologyConstants.IS_SUBJECT_OF_IRI));
// assert relations
OWLObjectPropertyAssertionAxiom has_subject_snp_relation = getDataFactory().getOWLObjectPropertyAssertionAxiom(has_subject, taIndiv, snpIndiv);
AddAxiom add_has_subject_snp = new AddAxiom(ontology, has_subject_snp_relation);
getManager().applyChange(add_has_subject_snp);
OWLObjectPropertyAssertionAxiom is_subject_of_snp_relation = getDataFactory().getOWLObjectPropertyAssertionAxiom(is_subject_of, snpIndiv, taIndiv);
AddAxiom add_is_subject_of_snp = new AddAxiom(ontology, is_subject_of_snp_relation);
getManager().applyChange(add_is_subject_of_snp);
}
// get the EFO class for the trait
for (EfoTrait efoTrait : association.getEfoTraits()) {
OWLClass traitClass;
traitClass = getDataFactory().getOWLClass(IRI.create(efoTrait.getUri()));
if (traitClass == null) {
String warning = "This trait will be mapped to Experimental Factor";
if (!issuedWarnings.contains(warning)) {
getLog().warn(warning);
issuedWarnings.add(warning);
}
traitClass = getDataFactory().getOWLClass(IRI.create(OntologyConstants.EXPERIMENTAL_FACTOR_CLASS_IRI));
}
// create a new trait instance (puns the class)
IRI traitIRI = traitClass.getIRI();
OWLNamedIndividual traitIndiv = getDataFactory().getOWLNamedIndividual(traitIRI);
if (ontology.containsIndividualInSignature(traitIRI)) {
getLog().trace("Trait individual '" + traitIRI.toString() + "' (type: " + traitClass + ") already exists");
} else {
getLog().trace("Creating trait individual '" + traitIRI.toString() + "' (type: " + traitClass + ")");
}
// and also add the gwas label to the individual so we don't lose curated data
OWLDataProperty has_gwas_trait_name = getDataFactory().getOWLDataProperty(IRI.create(OntologyConstants.HAS_GWAS_TRAIT_NAME_PROPERTY_IRI));
OWLLiteral gwasTrait = getDataFactory().getOWLLiteral(association.getStudy().getDiseaseTrait().getTrait());
OWLDataPropertyAssertionAxiom gwas_trait_relation = getDataFactory().getOWLDataPropertyAssertionAxiom(has_gwas_trait_name, taIndiv, gwasTrait);
AddAxiom add_gwas_trait_name = new AddAxiom(ontology, gwas_trait_relation);
getManager().applyChange(add_gwas_trait_name);
// assert class membership
OWLClassAssertionAxiom traitClassAssertion = getDataFactory().getOWLClassAssertionAxiom(traitClass, traitIndiv);
getManager().addAxiom(ontology, traitClassAssertion);
// get object properties
OWLObjectProperty has_object = getDataFactory().getOWLObjectProperty(IRI.create(OntologyConstants.HAS_OBJECT_IRI));
OWLObjectProperty is_object_of = getDataFactory().getOWLObjectProperty(IRI.create(OntologyConstants.IS_OBJECT_OF_IRI));
// assert relations
OWLObjectPropertyAssertionAxiom has_object_trait_relation = getDataFactory().getOWLObjectPropertyAssertionAxiom(has_object, taIndiv, traitIndiv);
AddAxiom add_has_object_trait = new AddAxiom(ontology, has_object_trait_relation);
getManager().applyChange(add_has_object_trait);
OWLObjectPropertyAssertionAxiom is_object_of_trait_relation = getDataFactory().getOWLObjectPropertyAssertionAxiom(is_object_of, traitIndiv, taIndiv);
AddAxiom add_is_object_of_trait = new AddAxiom(ontology, is_object_of_trait_relation);
getManager().applyChange(add_is_object_of_trait);
}
// finally, assert label for this association
OWLLiteral label = getDataFactory().getOWLLiteral("Association between " + rsId + " and " + association.getStudy().getDiseaseTrait().getTrait());
OWLAnnotationAssertionAxiom label_annotation = getDataFactory().getOWLAnnotationAssertionAxiom(rdfsLabel, taIndiv.getIRI(), label);
AddAxiom add_band_label = new AddAxiom(ontology, label_annotation);
getManager().applyChange(add_band_label);
}
}
use of uk.ac.ebi.spot.goci.model.RiskAllele in project goci by EBISPOT.
the class DefaultGWASOWLPublisher method validateGWASData.
/**
* Validates the data obtained from the GWAS catalog (prior to converting to OWL)
*
* @param studies the set of studies to validate
*/
protected void validateGWASData(Collection<Study> studies) {
// now check a random assortment of 5 studies for trait associations, abandoning broken ones
int count = 0;
int noAssocCount = 0;
int termMismatches = 0;
for (Study study : studies) {
// try {
Collection<Association> associations = study.getAssociations();
getLog().debug("Study (PubMed ID '" + study.getPubmedId() + "') had " + associations.size() + " associations");
if (associations.size() > 0) {
for (Association association : associations) {
String efoTraitsDashSepList = "";
for (EfoTrait efoTrait : association.getEfoTraits()) {
if ("".equals(efoTraitsDashSepList)) {
efoTraitsDashSepList.concat(efoTrait.getTrait());
} else {
efoTraitsDashSepList.concat(", " + efoTrait.getTrait());
}
}
for (Locus locus : association.getLoci()) {
for (RiskAllele riskAllele : locus.getStrongestRiskAlleles()) {
getLog().debug(// " Association: SNP '" + association.getAssociatedSNP().getRSID() +
" Association: SNP '" + riskAllele.getSnp().getRsId() + "' <-> Trait '" + efoTraitsDashSepList.toString() + "'");
}
}
}
count++;
} else {
noAssocCount++;
}
}
int eligCount = studies.size() - noAssocCount;
int correctCount = count + termMismatches;
getLog().info("\n\nREPORT:\n" + eligCount + "/" + studies.size() + " declared associations and therefore could usefully be mapped.\n" + (eligCount - count - termMismatches) + "/" + eligCount + " failed due to data integrity concerns.\n" + count + "/" + correctCount + " studies could be completely mapped after passing all checks.\n" + termMismatches + "/" + correctCount + " failed due to missing or duplicated terms in EFO");
}
use of uk.ac.ebi.spot.goci.model.RiskAllele in project goci by EBISPOT.
the class DefaultGWASOWLPublisher method filterAndPublishGWASData.
private OWLOntology filterAndPublishGWASData(OWLOntology conversion, Collection<Study> studies) throws OWLConversionException {
//TODO : check with tony : Discard studies which are not yet associated with a trait.
//Discard studies which are not associated with a disease trait and those which haven't been published yet
//by the GWAS catalog.
// Iterator<Study> iterator = studies.iterator();
// while(iterator.hasNext()){
// Study study = iterator.next();
// if(study.getDiseaseTrait() == null) {
// iterator.remove();
// }
// else if( study.getHousekeeping().getCatalogPublishDate() == null) {
// iterator.remove();
// }
// else if(study.getHousekeeping().getCatalogUnpublishDate() != null){
// iterator.remove();
// }
// }
Collection<Study> filteredStudies = new ArrayList<Study>();
Collection<Association> filteredTraitAssociations = new ArrayList<Association>();
Collection<SingleNucleotidePolymorphism> filteredSNPs = new ArrayList<SingleNucleotidePolymorphism>();
int count = 0;
int studyLimit = getStudiesLimit() == -1 ? Integer.MAX_VALUE : getStudiesLimit();
Iterator<Study> studyIterator = studies.iterator();
while (count < studyLimit && studyIterator.hasNext()) {
Study nextStudy = studyIterator.next();
//only process a study if no date filter has been provided or if the study's publication date is smaller than the filter date
if (FilterProperties.getDateFilter() == null || nextStudy.getPublicationDate().before(FilterProperties.getDateFilter())) {
System.out.println("Qualifying study");
for (Association nextTA : nextStudy.getAssociations()) {
float filter = 0;
float pval = 0;
if (FilterProperties.getPvalueFilter() != null) {
filter = (float) (FilterProperties.getPvalueMant() * Math.pow(10, FilterProperties.getPvalueExp()));
pval = (float) (nextTA.getPvalueMantissa() * Math.pow(10, nextTA.getPvalueExponent()));
System.out.println("Your comparators are " + filter + " and " + pval);
}
if (FilterProperties.getPvalueFilter() == null || pval < filter) {
System.out.println("Qualifying association");
filteredTraitAssociations.add(nextTA);
for (Locus locus : nextTA.getLoci()) {
for (RiskAllele riskAllele : locus.getStrongestRiskAlleles()) {
filteredSNPs.add(riskAllele.getSnp());
}
}
}
}
filteredStudies.add(nextStudy);
count++;
}
}
// convert this data, starting with SNPs (no dependencies) and working up to studies
getLog().debug("Starting conversion to OWL...");
getLog().debug("Converting " + filteredSNPs.size() + " filtered SNPs...");
getConverter().addSNPsToOntology(filteredSNPs, conversion);
getLog().debug("Converting " + filteredTraitAssociations.size() + " filtered Trait Associations...");
getConverter().addAssociationsToOntology(filteredTraitAssociations, conversion);
getLog().debug("Converting " + filteredStudies.size() + " filtered Studies...");
getConverter().addStudiesToOntology(filteredStudies, conversion);
getLog().debug("All conversion done!");
return conversion;
}
use of uk.ac.ebi.spot.goci.model.RiskAllele in project goci by EBISPOT.
the class AssociationService method loadAssociatedData.
public void loadAssociatedData(Association association) {
int traitCount = association.getEfoTraits().size();
Study study = studyService.fetchOne(association.getStudy());
AtomicInteger reportedGeneCount = new AtomicInteger();
Collection<SingleNucleotidePolymorphism> snps = new HashSet<>();
Collection<Region> regions = new HashSet<>();
Collection<Gene> mappedGenes = new HashSet<>();
Map<String, Set<String>> mappedGeneEntrezIds = new HashMap<>();
Map<String, Set<String>> mappedGeneEnsemblIds = new HashMap<>();
association.getLoci().forEach(locus -> {
locus.getStrongestRiskAlleles().stream().map(RiskAllele::getSnp).forEach(snp -> {
Collection<Location> snpLocations = snp.getLocations();
for (Location location : snpLocations) {
regions.add(location.getRegion());
}
snp.getGenomicContexts().forEach(context -> {
mappedGenes.add(context.getGene());
String geneName = context.getGene().getGeneName();
Collection<EntrezGene> geneEntrezGeneIds = context.getGene().getEntrezGeneIds();
Collection<EnsemblGene> geneEnsemblGeneIds = context.getGene().getEnsemblGeneIds();
if (mappedGeneEntrezIds.containsKey(geneName)) {
for (EntrezGene entrezGene : geneEntrezGeneIds) {
mappedGeneEntrezIds.get(geneName).add(entrezGene.getEntrezGeneId());
}
} else {
Set<String> entrezIds = new HashSet<>();
for (EntrezGene entrezGene : geneEntrezGeneIds) {
entrezIds.add(entrezGene.getEntrezGeneId());
}
mappedGeneEntrezIds.put(geneName, entrezIds);
}
if (mappedGeneEnsemblIds.containsKey(geneName)) {
for (EnsemblGene ensemblGene : geneEnsemblGeneIds) {
mappedGeneEnsemblIds.get(geneName).add(ensemblGene.getEnsemblGeneId());
}
} else {
Set<String> ensemblIds = new HashSet<>();
for (EnsemblGene ensemblGene : geneEnsemblGeneIds) {
ensemblIds.add(ensemblGene.getEnsemblGeneId());
}
mappedGeneEntrezIds.put(geneName, ensemblIds);
}
});
snps.add(snp);
});
snps.addAll(locus.getStrongestRiskAlleles().stream().map(RiskAllele::getSnp).collect(Collectors.toList()));
reportedGeneCount.addAndGet(locus.getAuthorReportedGenes().size());
locus.getAuthorReportedGenes().forEach(authorReportedGene -> {
authorReportedGene.getEnsemblGeneIds().size();
authorReportedGene.getEntrezGeneIds().size();
});
});
getLog().trace("Association '" + association.getId() + "' is mapped to " + "" + traitCount + " EFO traits where study id = " + study.getId() + " " + "(author reported " + reportedGeneCount + " gene(s)); " + "this reports on " + snps.size() + " SNPs in " + regions.size() + " regions, " + "mapped to " + mappedGenes.size() + " genes.");
}
use of uk.ac.ebi.spot.goci.model.RiskAllele in project goci by EBISPOT.
the class SingleSnpMultiSnpAssociationService method createForm.
// Creates form which we can then return to view for editing etc.
@Override
public SnpAssociationForm createForm(Association association) {
SnpAssociationStandardMultiForm form = new SnpAssociationStandardMultiForm();
// Set association ID
form.setAssociationId(association.getId());
// Set simple string and float association attributes
form.setRiskFrequency(association.getRiskFrequency());
form.setPvalueDescription(association.getPvalueDescription());
form.setSnpType(association.getSnpType());
form.setMultiSnpHaplotype(association.getMultiSnpHaplotype());
form.setSnpApproved(association.getSnpApproved());
form.setPvalueMantissa(association.getPvalueMantissa());
form.setPvalueExponent(association.getPvalueExponent());
form.setStandardError(association.getStandardError());
form.setRange(association.getRange());
form.setDescription(association.getDescription());
// Set OR/Beta values
form.setOrPerCopyNum(association.getOrPerCopyNum());
form.setOrPerCopyRecip(association.getOrPerCopyRecip());
form.setOrPerCopyRecipRange(association.getOrPerCopyRecipRange());
form.setBetaNum(association.getBetaNum());
form.setBetaUnit(association.getBetaUnit());
form.setBetaDirection(association.getBetaDirection());
// Add collection of Efo traits
form.setEfoTraits(association.getEfoTraits());
// For each locus get genes and risk alleles
Collection<Locus> loci = association.getLoci();
Collection<Gene> locusGenes = new ArrayList<>();
Collection<RiskAllele> locusRiskAlleles = new ArrayList<RiskAllele>();
// For multi-snp and standard snps we assume their is only one locus
for (Locus locus : loci) {
locusGenes.addAll(locus.getAuthorReportedGenes());
locusRiskAlleles.addAll(locus.getStrongestRiskAlleles().stream().sorted((v1, v2) -> Long.compare(v1.getId(), v2.getId())).collect(Collectors.toList()));
// There should only be one locus thus should be safe to set these here
form.setMultiSnpHaplotypeNum(locus.getHaplotypeSnpCount());
form.setMultiSnpHaplotypeDescr(locus.getDescription());
}
// Get name of gene and add to form
Collection<String> authorReportedGenes = new ArrayList<>();
for (Gene locusGene : locusGenes) {
authorReportedGenes.add(locusGene.getGeneName());
}
form.setAuthorReportedGenes(authorReportedGenes);
// Handle snp rows
Collection<GenomicContext> snpGenomicContexts = new ArrayList<GenomicContext>();
Collection<SingleNucleotidePolymorphism> snps = new ArrayList<>();
List<SnpFormRow> snpFormRows = new ArrayList<SnpFormRow>();
List<SnpMappingForm> snpMappingForms = new ArrayList<SnpMappingForm>();
for (RiskAllele riskAllele : locusRiskAlleles) {
SnpFormRow snpFormRow = new SnpFormRow();
snpFormRow.setStrongestRiskAllele(riskAllele.getRiskAlleleName());
SingleNucleotidePolymorphism snp = riskAllele.getSnp();
snps.add(snp);
String rsID = snp.getRsId();
snpFormRow.setSnp(rsID);
Collection<Location> locations = snp.getLocations();
for (Location location : locations) {
SnpMappingForm snpMappingForm = new SnpMappingForm(rsID, location);
snpMappingForms.add(snpMappingForm);
}
// Set proxy if one is present
Collection<String> proxySnps = new ArrayList<>();
if (riskAllele.getProxySnps() != null) {
for (SingleNucleotidePolymorphism riskAlleleProxySnp : riskAllele.getProxySnps()) {
proxySnps.add(riskAlleleProxySnp.getRsId());
}
}
snpFormRow.setProxySnps(proxySnps);
snpGenomicContexts.addAll(genomicContextRepository.findBySnpId(snp.getId()));
snpFormRows.add(snpFormRow);
}
form.setSnpMappingForms(snpMappingForms);
form.setGenomicContexts(snpGenomicContexts);
form.setSnps(snps);
form.setSnpFormRows(snpFormRows);
return form;
}
Aggregations