Search in sources :

Example 1 with Gene

use of uk.ac.ebi.spot.goci.model.Gene in project goci by EBISPOT.

the class AssociationRowProcessor method createLoci.

private Collection<Locus> createLoci(AssociationUploadRow row, Boolean snpInteraction, Boolean multiSnpHaplotype) {
    String delimiter;
    Collection<Locus> loci = new ArrayList<>();
    if (snpInteraction) {
        delimiter = "x";
        // For SNP interaction studies we need to create a locus per risk allele
        // Handle curator entered risk allele
        Collection<RiskAllele> locusRiskAlleles = createLocusRiskAlleles(row.getStrongestAllele(), row.getSnp(), row.getProxy(), row.getRiskFrequency(), row.getSnpStatus(), delimiter);
        // Deal with genes for each interaction which should be
        // separated by 'x'
        String[] separatedGenes = row.getAuthorReportedGene().split(delimiter);
        int geneIndex = 0;
        for (RiskAllele riskAllele : locusRiskAlleles) {
            Locus locus = new Locus();
            // Set risk alleles, assume one locus per risk allele
            Collection<RiskAllele> currentLocusRiskAlleles = new ArrayList<>();
            currentLocusRiskAlleles.add(riskAllele);
            locus.setStrongestRiskAlleles(currentLocusRiskAlleles);
            // Set gene
            String interactionGene = separatedGenes[geneIndex];
            Collection<Gene> locusGenes = associationAttributeService.createLocusGenes(interactionGene, ",");
            locus.setAuthorReportedGenes(locusGenes);
            geneIndex++;
            // Set description
            locus.setDescription("SNP x SNP interaction");
            loci.add(locus);
        }
    } else // Handle multi-snp and standard snp
    {
        delimiter = ";";
        // For multi-snp and standard snps we assume their is only one locus
        Locus locus = new Locus();
        // Handle curator entered genes, for haplotype they are separated by a comma
        if (row.getAuthorReportedGene() != null && !row.getAuthorReportedGene().isEmpty()) {
            Collection<Gene> locusGenes = associationAttributeService.createLocusGenes(row.getAuthorReportedGene(), ",");
            locus.setAuthorReportedGenes(locusGenes);
        }
        // Handle curator entered risk allele
        Collection<RiskAllele> locusRiskAlleles = createLocusRiskAlleles(row.getStrongestAllele(), row.getSnp(), row.getProxy(), row.getRiskFrequency(), row.getSnpStatus(), delimiter);
        // For standard associations set the risk allele frequency to the
        // same value as the overall association frequency
        Collection<RiskAllele> locusRiskAllelesWithRiskFrequencyValues = new ArrayList<>();
        if (!multiSnpHaplotype) {
            for (RiskAllele riskAllele : locusRiskAlleles) {
                riskAllele.setRiskFrequency(row.getAssociationRiskFrequency());
                locusRiskAllelesWithRiskFrequencyValues.add(riskAllele);
            }
            locus.setStrongestRiskAlleles(locusRiskAllelesWithRiskFrequencyValues);
        } else {
            locus.setStrongestRiskAlleles(locusRiskAlleles);
        }
        // Set locus attributes
        Integer haplotypeCount = locusRiskAlleles.size();
        if (haplotypeCount > 1) {
            locus.setHaplotypeSnpCount(haplotypeCount);
            locus.setDescription(String.valueOf(haplotypeCount) + "-SNP haplotype");
        } else {
            locus.setDescription("Single variant");
        }
        loci.add(locus);
    }
    return loci;
}
Also used : Gene(uk.ac.ebi.spot.goci.model.Gene) RiskAllele(uk.ac.ebi.spot.goci.model.RiskAllele) ArrayList(java.util.ArrayList) Locus(uk.ac.ebi.spot.goci.model.Locus)

Example 2 with Gene

use of uk.ac.ebi.spot.goci.model.Gene in project goci by EBISPOT.

the class EnsemblMappingPipeline method addGenomicContext.

/**
     * Create GenomicContext objects from the JSONObjects and add them to the class variable "genomic_contexts" (list of
     * "GenomicContext" classes)
     *
     * @param json_gene_list the list of overlapping genes in JSONObject format
     * @param snp_location   an instance of the Location class (chromosome name and position)
     * @param source         the source of the data (Ensembl or NCBI)
     * @param type           the type of genomic context (i.e. overlap, upstream, downstream)
     * @return boolean to indicate whether a closest gene has been found or not (only relevant for upstream and
     * downstream gene)
     */
private boolean addGenomicContext(JSONArray json_gene_list, Location snp_location, String source, String type) {
    String closest_gene = "";
    int closest_distance = 0;
    boolean intergenic = (type.equals("overlap")) ? false : true;
    boolean upstream = (type.equals("upstream")) ? true : false;
    boolean downstream = (type.equals("downstream")) ? true : false;
    Integer position = snp_location.getChromosomePosition();
    SingleNucleotidePolymorphism snp_tmp = new SingleNucleotidePolymorphism();
    snp_tmp.setRsId(getEnsemblMappingResult().getRsId());
    // Get closest gene
    if (intergenic) {
        int pos = position;
        for (int i = 0; i < json_gene_list.length(); ++i) {
            JSONObject json_gene = json_gene_list.getJSONObject(i);
            String gene_id = json_gene.getString("id");
            String gene_name = json_gene.getString("external_name");
            if (source.equals(getNcbiSource())) {
                if ((gene_name != null && getEnsemblMappingResult().getNcbiOverlappingGene().contains(gene_name)) || gene_name == null) {
                    // Skip overlapping genes which also overlap upstream and/or downstream of the variant
                    continue;
                }
            } else {
                if ((gene_name != null && getEnsemblMappingResult().getEnsemblOverlappingGene().contains(gene_name)) || gene_name == null) {
                    // Skip overlapping genes which also overlap upstream and/or downstream of the variant
                    continue;
                }
            }
            int distance = 0;
            if (type.equals("upstream")) {
                distance = pos - json_gene.getInt("end");
            } else if (type.equals("downstream")) {
                distance = json_gene.getInt("start") - pos;
            }
            if ((distance < closest_distance && distance > 0) || closest_distance == 0) {
                closest_gene = gene_id;
                closest_distance = distance;
            }
        }
    }
    for (int i = 0; i < json_gene_list.length(); ++i) {
        JSONObject json_gene = json_gene_list.getJSONObject(i);
        String gene_id = json_gene.getString("id");
        String gene_name = json_gene.getString("external_name");
        String ncbi_id = (source.equals("NCBI")) ? gene_id : null;
        String ensembl_id = (source.equals("Ensembl")) ? gene_id : null;
        int distance = 0;
        if (intergenic) {
            if (source.equals(getNcbiSource())) {
                if ((gene_name != null && getEnsemblMappingResult().getNcbiOverlappingGene().contains(gene_name)) || gene_name == null) {
                    // Skip overlapping genes which also overlap upstream and/or downstream of the variant
                    continue;
                }
            } else {
                if ((gene_name != null && getEnsemblMappingResult().getEnsemblOverlappingGene().contains(gene_name)) || gene_name == null) {
                    // Skip overlapping genes which also overlap upstream and/or downstream of the variant
                    continue;
                }
            }
            int pos = position;
            if (type.equals("upstream")) {
                distance = pos - json_gene.getInt("end");
            } else if (type.equals("downstream")) {
                distance = json_gene.getInt("start") - pos;
            }
        }
        Long dist = (long) distance;
        EntrezGene entrezGene = new EntrezGene();
        entrezGene.setEntrezGeneId(ncbi_id);
        Collection<EntrezGene> entrezGenes = new ArrayList<>();
        entrezGenes.add(entrezGene);
        EnsemblGene ensemblGene = new EnsemblGene();
        ensemblGene.setEnsemblGeneId(ensembl_id);
        Collection<EnsemblGene> ensemblGenes = new ArrayList<>();
        ensemblGenes.add(ensemblGene);
        Gene gene_object = new Gene(gene_name, entrezGenes, ensemblGenes);
        // Check if the gene corresponds to the closest gene
        boolean is_closest_gene = (closest_gene.equals(gene_id) && closest_gene != "") ? true : false;
        GenomicContext gc = new GenomicContext(intergenic, upstream, downstream, dist, snp_tmp, gene_object, snp_location, source, getMappingMethod(), is_closest_gene);
        getEnsemblMappingResult().addGenomicContext(gc);
    }
    return (closest_gene != "") ? true : false;
}
Also used : EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) ArrayList(java.util.ArrayList) GenomicContext(uk.ac.ebi.spot.goci.model.GenomicContext) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) JSONObject(org.json.JSONObject) EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) Gene(uk.ac.ebi.spot.goci.model.Gene) SingleNucleotidePolymorphism(uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism)

Example 3 with Gene

use of uk.ac.ebi.spot.goci.model.Gene in project goci by EBISPOT.

the class AssociationRowProcessorTest method testCreateAssociationFromUploadRowSnpInteraction.

@Test
public void testCreateAssociationFromUploadRowSnpInteraction() throws Exception {
    // Stubbing mock object behaviour
    when(associationAttributeService.createLocusGenes("PMS1 ", ",")).thenReturn(Arrays.asList(GENE_03));
    when(associationAttributeService.createLocusGenes(" HIBCH", ",")).thenReturn(Arrays.asList(GENE_04));
    when(associationAttributeService.createSnp("rs2562796")).thenReturn(SNP_02);
    when(associationAttributeService.createSnp("rs16832404")).thenReturn(SNP_03);
    when(associationAttributeService.createRiskAllele("rs2562796-T", SNP_02)).thenReturn(RA_02);
    when(associationAttributeService.createRiskAllele("rs16832404-G", SNP_03)).thenReturn(RA_03);
    Association association = associationRowProcessor.createAssociationFromUploadRow(SNP_INTERACTION_ROW);
    verify(associationCalculationService, never()).reverseCI(SNP_INTERACTION_ROW.getRange());
    verify(associationCalculationService, never()).setRange(SNP_INTERACTION_ROW.getStandardError(), SNP_INTERACTION_ROW.getOrPerCopyNum());
    verify(associationAttributeService, never()).getEfoTraitsFromRepository(Collections.EMPTY_LIST);
    verify(associationAttributeService, times(1)).createLocusGenes("PMS1 ", ",");
    verify(associationAttributeService, times(1)).createLocusGenes(" HIBCH", ",");
    verify(associationAttributeService, times(1)).createSnp("rs2562796");
    verify(associationAttributeService, times(1)).createSnp("rs16832404");
    verify(associationAttributeService, times(1)).createRiskAllele("rs2562796-T", SNP_02);
    verify(associationAttributeService, times(1)).createRiskAllele("rs16832404-G", SNP_03);
    assertThat(association).extracting("id", "riskFrequency", "pvalueDescription", "pvalueMantissa", "pvalueExponent", "multiSnpHaplotype", "snpInteraction", "snpApproved", "snpType", "standardError", "range", "description", "orPerCopyNum", "orPerCopyRecip", "orPerCopyRecipRange", "betaNum", "betaUnit", "betaDirection", "study", "associationReport", "lastMappingDate", "lastMappingPerformedBy", "lastUpdateDate").containsExactly(null, "0.52", null, 2, -7, false, true, false, null, (float) 0.6, "[0.82-0.92]", null, (float) 1.22, null, null, null, null, null, null, null, null, null, null);
    assertThat(association.getEfoTraits()).isEmpty();
    assertThat(association.getEvents()).isEmpty();
    assertThat(association.getStudy()).isNull();
    assertThat(association.getLoci()).hasSize(2);
    // Check locus attributes
    Collection<Gene> locusGenes = new ArrayList<>();
    association.getLoci().stream().forEach(locus -> {
        locusGenes.addAll(locus.getAuthorReportedGenes());
    });
    Collection<RiskAllele> locusRiskAlleles = new ArrayList<>();
    association.getLoci().stream().forEach(locus -> {
        locusRiskAlleles.addAll(locus.getStrongestRiskAlleles());
    });
    assertThat(association.getLoci()).extracting(Locus::getDescription).containsOnly("SNP x SNP interaction");
    assertThat(locusGenes).hasSize(2).contains(GENE_03, GENE_04);
    assertThat(locusRiskAlleles).hasSize(2).contains(RA_02, RA_03);
    assertThat(locusRiskAlleles).extracting("riskAlleleName", "riskFrequency", "snp.rsId").contains(tuple("rs2562796-T", "0.3", "rs2562796"), tuple("rs16832404-G", "0.4", "rs16832404"));
    assertThat(locusRiskAlleles).extracting(RiskAllele::getSnp).containsExactly(SNP_02, SNP_03);
}
Also used : Association(uk.ac.ebi.spot.goci.model.Association) Gene(uk.ac.ebi.spot.goci.model.Gene) RiskAllele(uk.ac.ebi.spot.goci.model.RiskAllele) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 4 with Gene

use of uk.ac.ebi.spot.goci.model.Gene in project goci by EBISPOT.

the class AssociationRowProcessorTest method testCreateAssociationFromUploadRowHaplotype.

@Test
public void testCreateAssociationFromUploadRowHaplotype() throws Exception {
    // Stubbing mock object behaviour
    when(associationAttributeService.createSnp("rs456")).thenReturn(SNP_04);
    when(associationAttributeService.createSnp("rs678")).thenReturn(SNP_05);
    when(associationAttributeService.createRiskAllele("rs456-T", SNP_04)).thenReturn(RA_04);
    when(associationAttributeService.createRiskAllele("rs678-?", SNP_05)).thenReturn(RA_05);
    when(associationCalculationService.reverseCI("[0.87-0.94]")).thenReturn("[1.06-1.15]");
    Association association = associationRowProcessor.createAssociationFromUploadRow(HAPLOTYPE_ROW);
    verify(associationCalculationService, times(1)).reverseCI(Matchers.anyString());
    verify(associationCalculationService, never()).setRange(Matchers.anyDouble(), Matchers.anyDouble());
    verify(associationAttributeService, never()).getEfoTraitsFromRepository(Collections.EMPTY_LIST);
    verify(associationAttributeService, never()).createLocusGenes(Matchers.anyString(), Matchers.anyString());
    verify(associationAttributeService, times(1)).createSnp("rs456");
    verify(associationAttributeService, times(1)).createSnp("rs678");
    verify(associationAttributeService, times(1)).createRiskAllele("rs456-T", SNP_04);
    verify(associationAttributeService, times(1)).createRiskAllele("rs678-?", SNP_05);
    assertThat(association).extracting("id", "riskFrequency", "pvalueDescription", "pvalueMantissa", "pvalueExponent", "multiSnpHaplotype", "snpInteraction", "snpApproved", "snpType", "standardError", "range", "description", "orPerCopyNum", "orPerCopyRecip", "orPerCopyRecipRange", "betaNum", "betaUnit", "betaDirection", "study", "associationReport", "lastMappingDate", "lastMappingPerformedBy", "lastUpdateDate").containsExactly(null, null, "(description)", 2, -7, true, false, false, null, null, "[1.06-1.15]", null, 1.2048193f, 0.83f, "[0.87-0.94]", null, null, null, null, null, null, null, null);
    assertThat(association.getEfoTraits()).isEmpty();
    assertThat(association.getEvents()).isEmpty();
    assertThat(association.getStudy()).isNull();
    assertThat(association.getLoci()).hasSize(1);
    Collection<RiskAllele> locusRiskAlleles = new ArrayList<>();
    association.getLoci().stream().forEach(locus -> {
        locusRiskAlleles.addAll(locus.getStrongestRiskAlleles());
    });
    // Check locus attributes
    Collection<Gene> locusGenes = new ArrayList<>();
    association.getLoci().stream().forEach(locus -> {
        locusGenes.addAll(locus.getAuthorReportedGenes());
    });
    assertThat(association.getLoci()).extracting(Locus::getDescription).containsOnly("2-SNP haplotype");
    assertThat(locusGenes).isEmpty();
    assertThat(locusRiskAlleles).hasSize(2).contains(RA_04, RA_05);
    assertThat(locusRiskAlleles).extracting(RiskAllele::getRiskFrequency).containsNull();
    assertThat(locusRiskAlleles).extracting("riskAlleleName", "riskFrequency", "snp.rsId").contains(tuple("rs456-T", null, "rs456"), tuple("rs678-?", null, "rs678"));
    assertThat(locusRiskAlleles).extracting(RiskAllele::getSnp).containsExactly(SNP_04, SNP_05);
}
Also used : Association(uk.ac.ebi.spot.goci.model.Association) Gene(uk.ac.ebi.spot.goci.model.Gene) RiskAllele(uk.ac.ebi.spot.goci.model.RiskAllele) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 5 with Gene

use of uk.ac.ebi.spot.goci.model.Gene in project goci by EBISPOT.

the class SnpGenomicContextMappingService method createOrRetrieveEnsemblExternalId.

/**
     * Method to create an Ensembl gene, this database table holds ensembl gene IDs
     *
     * @param id       Ensembl gene ID
     * @param geneName Gene name allows method to check if this id is actually already linked to another gene
     */
private EnsemblGene createOrRetrieveEnsemblExternalId(String id, String geneName) {
    EnsemblGene ensemblGene = ensemblGeneQueryService.findByEnsemblGeneId(id);
    // Create new entry in ENSEMBL_GENE table for this ID
    if (ensemblGene == null) {
        ensemblGene = new EnsemblGene();
        ensemblGene.setEnsemblGeneId(id);
        ensemblGeneRepository.save(ensemblGene);
    } else // Check this ID is not linked to a gene with a different name
    {
        Gene existingGeneLinkedToId = ensemblGene.getGene();
        if (existingGeneLinkedToId != null) {
            if (!Objects.equals(existingGeneLinkedToId.getGeneName(), geneName)) {
                getLog().warn("Ensembl ID: " + id + ", is already used in database by a different gene(s): " + existingGeneLinkedToId.getGeneName() + ". Will update so links to " + geneName);
                // For gene already linked to this ensembl ID remove the ensembl ID
                existingGeneLinkedToId.getEnsemblGeneIds().remove(ensemblGene);
                geneRepository.save(existingGeneLinkedToId);
            }
        }
    }
    return ensemblGene;
}
Also used : EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) Gene(uk.ac.ebi.spot.goci.model.Gene) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene)

Aggregations

Gene (uk.ac.ebi.spot.goci.model.Gene)21 ArrayList (java.util.ArrayList)16 RiskAllele (uk.ac.ebi.spot.goci.model.RiskAllele)11 SingleNucleotidePolymorphism (uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism)9 Locus (uk.ac.ebi.spot.goci.model.Locus)8 Association (uk.ac.ebi.spot.goci.model.Association)6 EnsemblGene (uk.ac.ebi.spot.goci.model.EnsemblGene)6 EntrezGene (uk.ac.ebi.spot.goci.model.EntrezGene)6 GenomicContext (uk.ac.ebi.spot.goci.model.GenomicContext)5 Location (uk.ac.ebi.spot.goci.model.Location)4 Test (org.junit.Test)3 HashSet (java.util.HashSet)2 SnpFormColumn (uk.ac.ebi.spot.goci.curation.model.SnpFormColumn)2 SnpFormRow (uk.ac.ebi.spot.goci.curation.model.SnpFormRow)2 SnpMappingForm (uk.ac.ebi.spot.goci.curation.model.SnpMappingForm)2 DateFormat (java.text.DateFormat)1 SimpleDateFormat (java.text.SimpleDateFormat)1 Collection (java.util.Collection)1 HashMap (java.util.HashMap)1 Set (java.util.Set)1