use of uk.ac.ebi.spot.goci.model.Locus in project goci by EBISPOT.
the class AssociationRowProcessor method createLoci.
private Collection<Locus> createLoci(AssociationUploadRow row, Boolean snpInteraction, Boolean multiSnpHaplotype) {
String delimiter;
Collection<Locus> loci = new ArrayList<>();
if (snpInteraction) {
delimiter = "x";
// For SNP interaction studies we need to create a locus per risk allele
// Handle curator entered risk allele
Collection<RiskAllele> locusRiskAlleles = createLocusRiskAlleles(row.getStrongestAllele(), row.getSnp(), row.getProxy(), row.getRiskFrequency(), row.getSnpStatus(), delimiter);
// Deal with genes for each interaction which should be
// separated by 'x'
String[] separatedGenes = row.getAuthorReportedGene().split(delimiter);
int geneIndex = 0;
for (RiskAllele riskAllele : locusRiskAlleles) {
Locus locus = new Locus();
// Set risk alleles, assume one locus per risk allele
Collection<RiskAllele> currentLocusRiskAlleles = new ArrayList<>();
currentLocusRiskAlleles.add(riskAllele);
locus.setStrongestRiskAlleles(currentLocusRiskAlleles);
// Set gene
String interactionGene = separatedGenes[geneIndex];
Collection<Gene> locusGenes = associationAttributeService.createLocusGenes(interactionGene, ",");
locus.setAuthorReportedGenes(locusGenes);
geneIndex++;
// Set description
locus.setDescription("SNP x SNP interaction");
loci.add(locus);
}
} else // Handle multi-snp and standard snp
{
delimiter = ";";
// For multi-snp and standard snps we assume their is only one locus
Locus locus = new Locus();
// Handle curator entered genes, for haplotype they are separated by a comma
if (row.getAuthorReportedGene() != null && !row.getAuthorReportedGene().isEmpty()) {
Collection<Gene> locusGenes = associationAttributeService.createLocusGenes(row.getAuthorReportedGene(), ",");
locus.setAuthorReportedGenes(locusGenes);
}
// Handle curator entered risk allele
Collection<RiskAllele> locusRiskAlleles = createLocusRiskAlleles(row.getStrongestAllele(), row.getSnp(), row.getProxy(), row.getRiskFrequency(), row.getSnpStatus(), delimiter);
// For standard associations set the risk allele frequency to the
// same value as the overall association frequency
Collection<RiskAllele> locusRiskAllelesWithRiskFrequencyValues = new ArrayList<>();
if (!multiSnpHaplotype) {
for (RiskAllele riskAllele : locusRiskAlleles) {
riskAllele.setRiskFrequency(row.getAssociationRiskFrequency());
locusRiskAllelesWithRiskFrequencyValues.add(riskAllele);
}
locus.setStrongestRiskAlleles(locusRiskAllelesWithRiskFrequencyValues);
} else {
locus.setStrongestRiskAlleles(locusRiskAlleles);
}
// Set locus attributes
Integer haplotypeCount = locusRiskAlleles.size();
if (haplotypeCount > 1) {
locus.setHaplotypeSnpCount(haplotypeCount);
locus.setDescription(String.valueOf(haplotypeCount) + "-SNP haplotype");
} else {
locus.setDescription("Single variant");
}
loci.add(locus);
}
return loci;
}
use of uk.ac.ebi.spot.goci.model.Locus in project goci by EBISPOT.
the class ValidationChecksBuilder method runLociAttributeChecks.
/**
* Run loci attributes checks on association
*
* @param association association to be checked
*/
public Collection<ValidationError> runLociAttributeChecks(Association association, String eRelease) {
Collection<ValidationError> validationErrors = new ArrayList<>();
if (association.getLoci() != null) {
Set<String> associationGenes = new HashSet<>();
Collection<ValidationError> geneErrors = new ArrayList<>();
// Create a unique set of all locus genes
for (Locus locus : association.getLoci()) {
Set<String> locusGenes = new HashSet<>();
if (!locus.getAuthorReportedGenes().isEmpty()) {
locusGenes = locus.getAuthorReportedGenes().stream().map(Gene::getGeneName).collect(Collectors.toSet());
}
associationGenes.addAll(locusGenes);
}
// Check genes
associationGenes.forEach(geneName -> {
getLog().info("Checking gene: ".concat(geneName));
ValidationError geneError = errorCreationService.checkGene(geneName, eRelease);
if (geneError.getError() != null) {
geneErrors.add(geneError);
}
});
if (!geneErrors.isEmpty()) {
validationErrors.addAll(geneErrors);
}
for (Locus locus : association.getLoci()) {
Collection<RiskAllele> riskAlleles = locus.getStrongestRiskAlleles();
// Check risk allele attributes
riskAlleles.forEach(riskAllele -> {
ValidationError riskAlleleError = errorCreationService.checkRiskAllele(riskAllele.getRiskAlleleName());
validationErrors.add(riskAlleleError);
if (geneErrors.isEmpty()) {
Set<String> locusGenes = locus.getAuthorReportedGenes().stream().map(Gene::getGeneName).collect(Collectors.toSet());
locusGenes.forEach(geneName -> {
getLog().info("Checking snp/gene location: ".concat(geneName).concat(" ").concat(riskAllele.getSnp().getRsId()));
ValidationError snpGeneLocationError = errorCreationService.checkSnpGeneLocation(riskAllele.getSnp().getRsId(), geneName, eRelease);
validationErrors.add(snpGeneLocationError);
});
} else {
ValidationError snpError = errorCreationService.checkSnp(riskAllele.getSnp().getRsId(), eRelease);
validationErrors.add(snpError);
}
});
}
}
return ErrorProcessingService.checkForValidErrors(validationErrors);
}
use of uk.ac.ebi.spot.goci.model.Locus in project goci by EBISPOT.
the class DefaultGWASOWLConverter method convertAssociation.
protected void convertAssociation(Association association, OWLOntology ontology, Set<String> issuedWarnings) {
// get the trait association class
OWLClass taClass = getDataFactory().getOWLClass(IRI.create(OntologyConstants.TRAIT_ASSOCIATION_CLASS_IRI));
IRI taIndIRI = getMinter().mint(OntologyConstants.GWAS_ONTOLOGY_BASE_IRI, association);
// create a new trait association instance
OWLNamedIndividual taIndiv = getDataFactory().getOWLNamedIndividual(taIndIRI);
// assert class membership
OWLClassAssertionAxiom classAssertion = getDataFactory().getOWLClassAssertionAxiom(taClass, taIndiv);
getManager().addAxiom(ontology, classAssertion);
// get datatype relations
OWLDataProperty has_p_value = getDataFactory().getOWLDataProperty(IRI.create(OntologyConstants.HAS_P_VALUE_PROPERTY_IRI));
// get annotation relations
OWLAnnotationProperty rdfsLabel = getDataFactory().getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI());
//pvalue but says it was less then 10-6. So if we have no pvalue we just don't add it.
if (association.getPvalueMantissa() != null && association.getPvalueExponent() != null) {
double pval = association.getPvalueMantissa() * Math.pow(10, association.getPvalueExponent());
OWLLiteral pValue = getDataFactory().getOWLLiteral(pval);
// OWLLiteral pValue = getDataFactory().getOWLLiteral(association.getPvalueMantissa()+"e"+association.getPvalueExponent());
OWLDataPropertyAssertionAxiom p_value_relation = getDataFactory().getOWLDataPropertyAssertionAxiom(has_p_value, taIndiv, pValue);
AddAxiom add_p_value = new AddAxiom(ontology, p_value_relation);
getManager().applyChange(add_p_value);
}
// get the snp instance for this association
OWLNamedIndividual snpIndiv;
String rsId = null;
for (Locus locus : association.getLoci()) {
for (RiskAllele riskAllele : locus.getStrongestRiskAlleles()) {
SingleNucleotidePolymorphism snp = riskAllele.getSnp();
rsId = snp.getRsId();
snpIndiv = getDataFactory().getOWLNamedIndividual(getMinter().mint(OntologyConstants.GWAS_ONTOLOGY_BASE_IRI, snp));
if (snpIndiv == null) {
String warning = "A new SNP with the given RSID only will be created";
if (!issuedWarnings.contains(warning)) {
getLog().warn(warning);
issuedWarnings.add(warning);
}
snpIndiv = getDataFactory().getOWLNamedIndividual(getMinter().mint(OntologyConstants.GWAS_ONTOLOGY_BASE_IRI, "SingleNucleotidePolymorphism", snp.getRsId(), true));
// assert class membership
OWLClass snpClass = getDataFactory().getOWLClass(IRI.create(OntologyConstants.SNP_CLASS_IRI));
OWLClassAssertionAxiom snpClassAssertion = getDataFactory().getOWLClassAssertionAxiom(snpClass, snpIndiv);
getManager().addAxiom(ontology, snpClassAssertion);
// assert rsid relation
OWLDataProperty has_snp_rsid = getDataFactory().getOWLDataProperty(IRI.create(OntologyConstants.HAS_SNP_REFERENCE_ID_PROPERTY_IRI));
OWLLiteral rsid = getDataFactory().getOWLLiteral(snp.getRsId());
OWLDataPropertyAssertionAxiom rsid_relation = getDataFactory().getOWLDataPropertyAssertionAxiom(has_snp_rsid, snpIndiv, rsid);
AddAxiom add_rsid = new AddAxiom(ontology, rsid_relation);
getManager().applyChange(add_rsid);
// assert label
OWLAnnotationAssertionAxiom snp_label_annotation = getDataFactory().getOWLAnnotationAssertionAxiom(rdfsLabel, snpIndiv.getIRI(), rsid);
AddAxiom add_snp_label = new AddAxiom(ontology, snp_label_annotation);
getManager().applyChange(add_snp_label);
}
// get object properties
OWLObjectProperty has_subject = getDataFactory().getOWLObjectProperty(IRI.create(OntologyConstants.HAS_SUBJECT_IRI));
OWLObjectProperty is_subject_of = getDataFactory().getOWLObjectProperty(IRI.create(OntologyConstants.IS_SUBJECT_OF_IRI));
// assert relations
OWLObjectPropertyAssertionAxiom has_subject_snp_relation = getDataFactory().getOWLObjectPropertyAssertionAxiom(has_subject, taIndiv, snpIndiv);
AddAxiom add_has_subject_snp = new AddAxiom(ontology, has_subject_snp_relation);
getManager().applyChange(add_has_subject_snp);
OWLObjectPropertyAssertionAxiom is_subject_of_snp_relation = getDataFactory().getOWLObjectPropertyAssertionAxiom(is_subject_of, snpIndiv, taIndiv);
AddAxiom add_is_subject_of_snp = new AddAxiom(ontology, is_subject_of_snp_relation);
getManager().applyChange(add_is_subject_of_snp);
}
// get the EFO class for the trait
for (EfoTrait efoTrait : association.getEfoTraits()) {
OWLClass traitClass;
traitClass = getDataFactory().getOWLClass(IRI.create(efoTrait.getUri()));
if (traitClass == null) {
String warning = "This trait will be mapped to Experimental Factor";
if (!issuedWarnings.contains(warning)) {
getLog().warn(warning);
issuedWarnings.add(warning);
}
traitClass = getDataFactory().getOWLClass(IRI.create(OntologyConstants.EXPERIMENTAL_FACTOR_CLASS_IRI));
}
// create a new trait instance (puns the class)
IRI traitIRI = traitClass.getIRI();
OWLNamedIndividual traitIndiv = getDataFactory().getOWLNamedIndividual(traitIRI);
if (ontology.containsIndividualInSignature(traitIRI)) {
getLog().trace("Trait individual '" + traitIRI.toString() + "' (type: " + traitClass + ") already exists");
} else {
getLog().trace("Creating trait individual '" + traitIRI.toString() + "' (type: " + traitClass + ")");
}
// and also add the gwas label to the individual so we don't lose curated data
OWLDataProperty has_gwas_trait_name = getDataFactory().getOWLDataProperty(IRI.create(OntologyConstants.HAS_GWAS_TRAIT_NAME_PROPERTY_IRI));
OWLLiteral gwasTrait = getDataFactory().getOWLLiteral(association.getStudy().getDiseaseTrait().getTrait());
OWLDataPropertyAssertionAxiom gwas_trait_relation = getDataFactory().getOWLDataPropertyAssertionAxiom(has_gwas_trait_name, taIndiv, gwasTrait);
AddAxiom add_gwas_trait_name = new AddAxiom(ontology, gwas_trait_relation);
getManager().applyChange(add_gwas_trait_name);
// assert class membership
OWLClassAssertionAxiom traitClassAssertion = getDataFactory().getOWLClassAssertionAxiom(traitClass, traitIndiv);
getManager().addAxiom(ontology, traitClassAssertion);
// get object properties
OWLObjectProperty has_object = getDataFactory().getOWLObjectProperty(IRI.create(OntologyConstants.HAS_OBJECT_IRI));
OWLObjectProperty is_object_of = getDataFactory().getOWLObjectProperty(IRI.create(OntologyConstants.IS_OBJECT_OF_IRI));
// assert relations
OWLObjectPropertyAssertionAxiom has_object_trait_relation = getDataFactory().getOWLObjectPropertyAssertionAxiom(has_object, taIndiv, traitIndiv);
AddAxiom add_has_object_trait = new AddAxiom(ontology, has_object_trait_relation);
getManager().applyChange(add_has_object_trait);
OWLObjectPropertyAssertionAxiom is_object_of_trait_relation = getDataFactory().getOWLObjectPropertyAssertionAxiom(is_object_of, traitIndiv, taIndiv);
AddAxiom add_is_object_of_trait = new AddAxiom(ontology, is_object_of_trait_relation);
getManager().applyChange(add_is_object_of_trait);
}
// finally, assert label for this association
OWLLiteral label = getDataFactory().getOWLLiteral("Association between " + rsId + " and " + association.getStudy().getDiseaseTrait().getTrait());
OWLAnnotationAssertionAxiom label_annotation = getDataFactory().getOWLAnnotationAssertionAxiom(rdfsLabel, taIndiv.getIRI(), label);
AddAxiom add_band_label = new AddAxiom(ontology, label_annotation);
getManager().applyChange(add_band_label);
}
}
use of uk.ac.ebi.spot.goci.model.Locus in project goci by EBISPOT.
the class DefaultGWASOWLPublisher method validateGWASData.
/**
* Validates the data obtained from the GWAS catalog (prior to converting to OWL)
*
* @param studies the set of studies to validate
*/
protected void validateGWASData(Collection<Study> studies) {
// now check a random assortment of 5 studies for trait associations, abandoning broken ones
int count = 0;
int noAssocCount = 0;
int termMismatches = 0;
for (Study study : studies) {
// try {
Collection<Association> associations = study.getAssociations();
getLog().debug("Study (PubMed ID '" + study.getPubmedId() + "') had " + associations.size() + " associations");
if (associations.size() > 0) {
for (Association association : associations) {
String efoTraitsDashSepList = "";
for (EfoTrait efoTrait : association.getEfoTraits()) {
if ("".equals(efoTraitsDashSepList)) {
efoTraitsDashSepList.concat(efoTrait.getTrait());
} else {
efoTraitsDashSepList.concat(", " + efoTrait.getTrait());
}
}
for (Locus locus : association.getLoci()) {
for (RiskAllele riskAllele : locus.getStrongestRiskAlleles()) {
getLog().debug(// " Association: SNP '" + association.getAssociatedSNP().getRSID() +
" Association: SNP '" + riskAllele.getSnp().getRsId() + "' <-> Trait '" + efoTraitsDashSepList.toString() + "'");
}
}
}
count++;
} else {
noAssocCount++;
}
}
int eligCount = studies.size() - noAssocCount;
int correctCount = count + termMismatches;
getLog().info("\n\nREPORT:\n" + eligCount + "/" + studies.size() + " declared associations and therefore could usefully be mapped.\n" + (eligCount - count - termMismatches) + "/" + eligCount + " failed due to data integrity concerns.\n" + count + "/" + correctCount + " studies could be completely mapped after passing all checks.\n" + termMismatches + "/" + correctCount + " failed due to missing or duplicated terms in EFO");
}
use of uk.ac.ebi.spot.goci.model.Locus in project goci by EBISPOT.
the class DefaultGWASOWLPublisher method filterAndPublishGWASData.
private OWLOntology filterAndPublishGWASData(OWLOntology conversion, Collection<Study> studies) throws OWLConversionException {
//TODO : check with tony : Discard studies which are not yet associated with a trait.
//Discard studies which are not associated with a disease trait and those which haven't been published yet
//by the GWAS catalog.
// Iterator<Study> iterator = studies.iterator();
// while(iterator.hasNext()){
// Study study = iterator.next();
// if(study.getDiseaseTrait() == null) {
// iterator.remove();
// }
// else if( study.getHousekeeping().getCatalogPublishDate() == null) {
// iterator.remove();
// }
// else if(study.getHousekeeping().getCatalogUnpublishDate() != null){
// iterator.remove();
// }
// }
Collection<Study> filteredStudies = new ArrayList<Study>();
Collection<Association> filteredTraitAssociations = new ArrayList<Association>();
Collection<SingleNucleotidePolymorphism> filteredSNPs = new ArrayList<SingleNucleotidePolymorphism>();
int count = 0;
int studyLimit = getStudiesLimit() == -1 ? Integer.MAX_VALUE : getStudiesLimit();
Iterator<Study> studyIterator = studies.iterator();
while (count < studyLimit && studyIterator.hasNext()) {
Study nextStudy = studyIterator.next();
//only process a study if no date filter has been provided or if the study's publication date is smaller than the filter date
if (FilterProperties.getDateFilter() == null || nextStudy.getPublicationDate().before(FilterProperties.getDateFilter())) {
System.out.println("Qualifying study");
for (Association nextTA : nextStudy.getAssociations()) {
float filter = 0;
float pval = 0;
if (FilterProperties.getPvalueFilter() != null) {
filter = (float) (FilterProperties.getPvalueMant() * Math.pow(10, FilterProperties.getPvalueExp()));
pval = (float) (nextTA.getPvalueMantissa() * Math.pow(10, nextTA.getPvalueExponent()));
System.out.println("Your comparators are " + filter + " and " + pval);
}
if (FilterProperties.getPvalueFilter() == null || pval < filter) {
System.out.println("Qualifying association");
filteredTraitAssociations.add(nextTA);
for (Locus locus : nextTA.getLoci()) {
for (RiskAllele riskAllele : locus.getStrongestRiskAlleles()) {
filteredSNPs.add(riskAllele.getSnp());
}
}
}
}
filteredStudies.add(nextStudy);
count++;
}
}
// convert this data, starting with SNPs (no dependencies) and working up to studies
getLog().debug("Starting conversion to OWL...");
getLog().debug("Converting " + filteredSNPs.size() + " filtered SNPs...");
getConverter().addSNPsToOntology(filteredSNPs, conversion);
getLog().debug("Converting " + filteredTraitAssociations.size() + " filtered Trait Associations...");
getConverter().addAssociationsToOntology(filteredTraitAssociations, conversion);
getLog().debug("Converting " + filteredStudies.size() + " filtered Studies...");
getConverter().addStudiesToOntology(filteredStudies, conversion);
getLog().debug("All conversion done!");
return conversion;
}
Aggregations