use of uk.ac.ebi.spot.goci.model.Gene in project goci by EBISPOT.
the class AssociationRowProcessor method createLoci.
private Collection<Locus> createLoci(AssociationUploadRow row, Boolean snpInteraction, Boolean multiSnpHaplotype) {
String delimiter;
Collection<Locus> loci = new ArrayList<>();
if (snpInteraction) {
delimiter = "x";
// For SNP interaction studies we need to create a locus per risk allele
// Handle curator entered risk allele
Collection<RiskAllele> locusRiskAlleles = createLocusRiskAlleles(row.getStrongestAllele(), row.getSnp(), row.getProxy(), row.getRiskFrequency(), row.getSnpStatus(), delimiter);
// Deal with genes for each interaction which should be
// separated by 'x'
String[] separatedGenes = row.getAuthorReportedGene().split(delimiter);
int geneIndex = 0;
for (RiskAllele riskAllele : locusRiskAlleles) {
Locus locus = new Locus();
// Set risk alleles, assume one locus per risk allele
Collection<RiskAllele> currentLocusRiskAlleles = new ArrayList<>();
currentLocusRiskAlleles.add(riskAllele);
locus.setStrongestRiskAlleles(currentLocusRiskAlleles);
// Set gene
String interactionGene = separatedGenes[geneIndex];
Collection<Gene> locusGenes = associationAttributeService.createLocusGenes(interactionGene, ",");
locus.setAuthorReportedGenes(locusGenes);
geneIndex++;
// Set description
locus.setDescription("SNP x SNP interaction");
loci.add(locus);
}
} else // Handle multi-snp and standard snp
{
delimiter = ";";
// For multi-snp and standard snps we assume their is only one locus
Locus locus = new Locus();
// Handle curator entered genes, for haplotype they are separated by a comma
if (row.getAuthorReportedGene() != null && !row.getAuthorReportedGene().isEmpty()) {
Collection<Gene> locusGenes = associationAttributeService.createLocusGenes(row.getAuthorReportedGene(), ",");
locus.setAuthorReportedGenes(locusGenes);
}
// Handle curator entered risk allele
Collection<RiskAllele> locusRiskAlleles = createLocusRiskAlleles(row.getStrongestAllele(), row.getSnp(), row.getProxy(), row.getRiskFrequency(), row.getSnpStatus(), delimiter);
// For standard associations set the risk allele frequency to the
// same value as the overall association frequency
Collection<RiskAllele> locusRiskAllelesWithRiskFrequencyValues = new ArrayList<>();
if (!multiSnpHaplotype) {
for (RiskAllele riskAllele : locusRiskAlleles) {
riskAllele.setRiskFrequency(row.getAssociationRiskFrequency());
locusRiskAllelesWithRiskFrequencyValues.add(riskAllele);
}
locus.setStrongestRiskAlleles(locusRiskAllelesWithRiskFrequencyValues);
} else {
locus.setStrongestRiskAlleles(locusRiskAlleles);
}
// Set locus attributes
Integer haplotypeCount = locusRiskAlleles.size();
if (haplotypeCount > 1) {
locus.setHaplotypeSnpCount(haplotypeCount);
locus.setDescription(String.valueOf(haplotypeCount) + "-SNP haplotype");
} else {
locus.setDescription("Single variant");
}
loci.add(locus);
}
return loci;
}
use of uk.ac.ebi.spot.goci.model.Gene in project goci by EBISPOT.
the class EnsemblMappingPipeline method addGenomicContext.
/**
* Create GenomicContext objects from the JSONObjects and add them to the class variable "genomic_contexts" (list of
* "GenomicContext" classes)
*
* @param json_gene_list the list of overlapping genes in JSONObject format
* @param snp_location an instance of the Location class (chromosome name and position)
* @param source the source of the data (Ensembl or NCBI)
* @param type the type of genomic context (i.e. overlap, upstream, downstream)
* @return boolean to indicate whether a closest gene has been found or not (only relevant for upstream and
* downstream gene)
*/
private boolean addGenomicContext(JSONArray json_gene_list, Location snp_location, String source, String type) {
String closest_gene = "";
int closest_distance = 0;
boolean intergenic = (type.equals("overlap")) ? false : true;
boolean upstream = (type.equals("upstream")) ? true : false;
boolean downstream = (type.equals("downstream")) ? true : false;
Integer position = snp_location.getChromosomePosition();
SingleNucleotidePolymorphism snp_tmp = new SingleNucleotidePolymorphism();
snp_tmp.setRsId(getEnsemblMappingResult().getRsId());
// Get closest gene
if (intergenic) {
int pos = position;
for (int i = 0; i < json_gene_list.length(); ++i) {
JSONObject json_gene = json_gene_list.getJSONObject(i);
String gene_id = json_gene.getString("id");
String gene_name = json_gene.getString("external_name");
if (source.equals(getNcbiSource())) {
if ((gene_name != null && getEnsemblMappingResult().getNcbiOverlappingGene().contains(gene_name)) || gene_name == null) {
// Skip overlapping genes which also overlap upstream and/or downstream of the variant
continue;
}
} else {
if ((gene_name != null && getEnsemblMappingResult().getEnsemblOverlappingGene().contains(gene_name)) || gene_name == null) {
// Skip overlapping genes which also overlap upstream and/or downstream of the variant
continue;
}
}
int distance = 0;
if (type.equals("upstream")) {
distance = pos - json_gene.getInt("end");
} else if (type.equals("downstream")) {
distance = json_gene.getInt("start") - pos;
}
if ((distance < closest_distance && distance > 0) || closest_distance == 0) {
closest_gene = gene_id;
closest_distance = distance;
}
}
}
for (int i = 0; i < json_gene_list.length(); ++i) {
JSONObject json_gene = json_gene_list.getJSONObject(i);
String gene_id = json_gene.getString("id");
String gene_name = json_gene.getString("external_name");
String ncbi_id = (source.equals("NCBI")) ? gene_id : null;
String ensembl_id = (source.equals("Ensembl")) ? gene_id : null;
int distance = 0;
if (intergenic) {
if (source.equals(getNcbiSource())) {
if ((gene_name != null && getEnsemblMappingResult().getNcbiOverlappingGene().contains(gene_name)) || gene_name == null) {
// Skip overlapping genes which also overlap upstream and/or downstream of the variant
continue;
}
} else {
if ((gene_name != null && getEnsemblMappingResult().getEnsemblOverlappingGene().contains(gene_name)) || gene_name == null) {
// Skip overlapping genes which also overlap upstream and/or downstream of the variant
continue;
}
}
int pos = position;
if (type.equals("upstream")) {
distance = pos - json_gene.getInt("end");
} else if (type.equals("downstream")) {
distance = json_gene.getInt("start") - pos;
}
}
Long dist = (long) distance;
EntrezGene entrezGene = new EntrezGene();
entrezGene.setEntrezGeneId(ncbi_id);
Collection<EntrezGene> entrezGenes = new ArrayList<>();
entrezGenes.add(entrezGene);
EnsemblGene ensemblGene = new EnsemblGene();
ensemblGene.setEnsemblGeneId(ensembl_id);
Collection<EnsemblGene> ensemblGenes = new ArrayList<>();
ensemblGenes.add(ensemblGene);
Gene gene_object = new Gene(gene_name, entrezGenes, ensemblGenes);
// Check if the gene corresponds to the closest gene
boolean is_closest_gene = (closest_gene.equals(gene_id) && closest_gene != "") ? true : false;
GenomicContext gc = new GenomicContext(intergenic, upstream, downstream, dist, snp_tmp, gene_object, snp_location, source, getMappingMethod(), is_closest_gene);
getEnsemblMappingResult().addGenomicContext(gc);
}
return (closest_gene != "") ? true : false;
}
use of uk.ac.ebi.spot.goci.model.Gene in project goci by EBISPOT.
the class AssociationRowProcessorTest method testCreateAssociationFromUploadRowSnpInteraction.
@Test
public void testCreateAssociationFromUploadRowSnpInteraction() throws Exception {
// Stubbing mock object behaviour
when(associationAttributeService.createLocusGenes("PMS1 ", ",")).thenReturn(Arrays.asList(GENE_03));
when(associationAttributeService.createLocusGenes(" HIBCH", ",")).thenReturn(Arrays.asList(GENE_04));
when(associationAttributeService.createSnp("rs2562796")).thenReturn(SNP_02);
when(associationAttributeService.createSnp("rs16832404")).thenReturn(SNP_03);
when(associationAttributeService.createRiskAllele("rs2562796-T", SNP_02)).thenReturn(RA_02);
when(associationAttributeService.createRiskAllele("rs16832404-G", SNP_03)).thenReturn(RA_03);
Association association = associationRowProcessor.createAssociationFromUploadRow(SNP_INTERACTION_ROW);
verify(associationCalculationService, never()).reverseCI(SNP_INTERACTION_ROW.getRange());
verify(associationCalculationService, never()).setRange(SNP_INTERACTION_ROW.getStandardError(), SNP_INTERACTION_ROW.getOrPerCopyNum());
verify(associationAttributeService, never()).getEfoTraitsFromRepository(Collections.EMPTY_LIST);
verify(associationAttributeService, times(1)).createLocusGenes("PMS1 ", ",");
verify(associationAttributeService, times(1)).createLocusGenes(" HIBCH", ",");
verify(associationAttributeService, times(1)).createSnp("rs2562796");
verify(associationAttributeService, times(1)).createSnp("rs16832404");
verify(associationAttributeService, times(1)).createRiskAllele("rs2562796-T", SNP_02);
verify(associationAttributeService, times(1)).createRiskAllele("rs16832404-G", SNP_03);
assertThat(association).extracting("id", "riskFrequency", "pvalueDescription", "pvalueMantissa", "pvalueExponent", "multiSnpHaplotype", "snpInteraction", "snpApproved", "snpType", "standardError", "range", "description", "orPerCopyNum", "orPerCopyRecip", "orPerCopyRecipRange", "betaNum", "betaUnit", "betaDirection", "study", "associationReport", "lastMappingDate", "lastMappingPerformedBy", "lastUpdateDate").containsExactly(null, "0.52", null, 2, -7, false, true, false, null, (float) 0.6, "[0.82-0.92]", null, (float) 1.22, null, null, null, null, null, null, null, null, null, null);
assertThat(association.getEfoTraits()).isEmpty();
assertThat(association.getEvents()).isEmpty();
assertThat(association.getStudy()).isNull();
assertThat(association.getLoci()).hasSize(2);
// Check locus attributes
Collection<Gene> locusGenes = new ArrayList<>();
association.getLoci().stream().forEach(locus -> {
locusGenes.addAll(locus.getAuthorReportedGenes());
});
Collection<RiskAllele> locusRiskAlleles = new ArrayList<>();
association.getLoci().stream().forEach(locus -> {
locusRiskAlleles.addAll(locus.getStrongestRiskAlleles());
});
assertThat(association.getLoci()).extracting(Locus::getDescription).containsOnly("SNP x SNP interaction");
assertThat(locusGenes).hasSize(2).contains(GENE_03, GENE_04);
assertThat(locusRiskAlleles).hasSize(2).contains(RA_02, RA_03);
assertThat(locusRiskAlleles).extracting("riskAlleleName", "riskFrequency", "snp.rsId").contains(tuple("rs2562796-T", "0.3", "rs2562796"), tuple("rs16832404-G", "0.4", "rs16832404"));
assertThat(locusRiskAlleles).extracting(RiskAllele::getSnp).containsExactly(SNP_02, SNP_03);
}
use of uk.ac.ebi.spot.goci.model.Gene in project goci by EBISPOT.
the class AssociationRowProcessorTest method testCreateAssociationFromUploadRowHaplotype.
@Test
public void testCreateAssociationFromUploadRowHaplotype() throws Exception {
// Stubbing mock object behaviour
when(associationAttributeService.createSnp("rs456")).thenReturn(SNP_04);
when(associationAttributeService.createSnp("rs678")).thenReturn(SNP_05);
when(associationAttributeService.createRiskAllele("rs456-T", SNP_04)).thenReturn(RA_04);
when(associationAttributeService.createRiskAllele("rs678-?", SNP_05)).thenReturn(RA_05);
when(associationCalculationService.reverseCI("[0.87-0.94]")).thenReturn("[1.06-1.15]");
Association association = associationRowProcessor.createAssociationFromUploadRow(HAPLOTYPE_ROW);
verify(associationCalculationService, times(1)).reverseCI(Matchers.anyString());
verify(associationCalculationService, never()).setRange(Matchers.anyDouble(), Matchers.anyDouble());
verify(associationAttributeService, never()).getEfoTraitsFromRepository(Collections.EMPTY_LIST);
verify(associationAttributeService, never()).createLocusGenes(Matchers.anyString(), Matchers.anyString());
verify(associationAttributeService, times(1)).createSnp("rs456");
verify(associationAttributeService, times(1)).createSnp("rs678");
verify(associationAttributeService, times(1)).createRiskAllele("rs456-T", SNP_04);
verify(associationAttributeService, times(1)).createRiskAllele("rs678-?", SNP_05);
assertThat(association).extracting("id", "riskFrequency", "pvalueDescription", "pvalueMantissa", "pvalueExponent", "multiSnpHaplotype", "snpInteraction", "snpApproved", "snpType", "standardError", "range", "description", "orPerCopyNum", "orPerCopyRecip", "orPerCopyRecipRange", "betaNum", "betaUnit", "betaDirection", "study", "associationReport", "lastMappingDate", "lastMappingPerformedBy", "lastUpdateDate").containsExactly(null, null, "(description)", 2, -7, true, false, false, null, null, "[1.06-1.15]", null, 1.2048193f, 0.83f, "[0.87-0.94]", null, null, null, null, null, null, null, null);
assertThat(association.getEfoTraits()).isEmpty();
assertThat(association.getEvents()).isEmpty();
assertThat(association.getStudy()).isNull();
assertThat(association.getLoci()).hasSize(1);
Collection<RiskAllele> locusRiskAlleles = new ArrayList<>();
association.getLoci().stream().forEach(locus -> {
locusRiskAlleles.addAll(locus.getStrongestRiskAlleles());
});
// Check locus attributes
Collection<Gene> locusGenes = new ArrayList<>();
association.getLoci().stream().forEach(locus -> {
locusGenes.addAll(locus.getAuthorReportedGenes());
});
assertThat(association.getLoci()).extracting(Locus::getDescription).containsOnly("2-SNP haplotype");
assertThat(locusGenes).isEmpty();
assertThat(locusRiskAlleles).hasSize(2).contains(RA_04, RA_05);
assertThat(locusRiskAlleles).extracting(RiskAllele::getRiskFrequency).containsNull();
assertThat(locusRiskAlleles).extracting("riskAlleleName", "riskFrequency", "snp.rsId").contains(tuple("rs456-T", null, "rs456"), tuple("rs678-?", null, "rs678"));
assertThat(locusRiskAlleles).extracting(RiskAllele::getSnp).containsExactly(SNP_04, SNP_05);
}
use of uk.ac.ebi.spot.goci.model.Gene in project goci by EBISPOT.
the class SnpGenomicContextMappingService method createOrRetrieveEnsemblExternalId.
/**
* Method to create an Ensembl gene, this database table holds ensembl gene IDs
*
* @param id Ensembl gene ID
* @param geneName Gene name allows method to check if this id is actually already linked to another gene
*/
private EnsemblGene createOrRetrieveEnsemblExternalId(String id, String geneName) {
EnsemblGene ensemblGene = ensemblGeneQueryService.findByEnsemblGeneId(id);
// Create new entry in ENSEMBL_GENE table for this ID
if (ensemblGene == null) {
ensemblGene = new EnsemblGene();
ensemblGene.setEnsemblGeneId(id);
ensemblGeneRepository.save(ensemblGene);
} else // Check this ID is not linked to a gene with a different name
{
Gene existingGeneLinkedToId = ensemblGene.getGene();
if (existingGeneLinkedToId != null) {
if (!Objects.equals(existingGeneLinkedToId.getGeneName(), geneName)) {
getLog().warn("Ensembl ID: " + id + ", is already used in database by a different gene(s): " + existingGeneLinkedToId.getGeneName() + ". Will update so links to " + geneName);
// For gene already linked to this ensembl ID remove the ensembl ID
existingGeneLinkedToId.getEnsemblGeneIds().remove(ensemblGene);
geneRepository.save(existingGeneLinkedToId);
}
}
}
return ensemblGene;
}
Aggregations