Search in sources :

Example 1 with EntrezGene

use of uk.ac.ebi.spot.goci.model.EntrezGene in project goci by EBISPOT.

the class EnsemblMappingPipeline method addGenomicContext.

/**
 * Create GenomicContext objects from the JSONObjects and add them to the class variable "genomic_contexts" (list of
 * "GenomicContext" classes)
 *
 * @param json_gene_list the list of overlapping genes in JSONObject format
 * @param snp_location   an instance of the Location class (chromosome name and position)
 * @param source         the source of the data (Ensembl or NCBI)
 * @param type           the type of genomic context (i.e. overlap, upstream, downstream)
 * @return boolean to indicate whether a closest gene has been found or not (only relevant for upstream and
 * downstream gene)
 */
private boolean addGenomicContext(JSONArray json_gene_list, Location snp_location, String source, String type) {
    String closest_gene = "";
    int closest_distance = 0;
    boolean intergenic = (type.equals("overlap")) ? false : true;
    boolean upstream = (type.equals("upstream")) ? true : false;
    boolean downstream = (type.equals("downstream")) ? true : false;
    Integer position = snp_location.getChromosomePosition();
    SingleNucleotidePolymorphism snp_tmp = new SingleNucleotidePolymorphism();
    snp_tmp.setRsId(getEnsemblMappingResult().getRsId());
    if (getEnsemblMappingResult().getRsId() == null) {
        throw new IllegalArgumentException("error, no RS ID found for location " + snp_location.getId());
    }
    // Get closest gene
    if (intergenic) {
        int pos = position;
        for (int i = 0; i < json_gene_list.length(); ++i) {
            JSONObject json_gene = json_gene_list.getJSONObject(i);
            String gene_name = json_gene.getString("external_name");
            // If the source is NCBI, we parse the ID from the description:
            String gene_id = source.equals(getNcbiSource()) ? parseNCBIid(json_gene.getString("description"), gene_name) : json_gene.getString("id");
            if (source.equals(getNcbiSource())) {
                if ((gene_name != null && getEnsemblMappingResult().getNcbiOverlappingGene().contains(gene_name)) || gene_name == null) {
                    // Skip overlapping genes which also overlap upstream and/or downstream of the variant
                    continue;
                }
            } else {
                if ((gene_name != null && getEnsemblMappingResult().getEnsemblOverlappingGene().contains(gene_name)) || gene_name == null) {
                    // Skip overlapping genes which also overlap upstream and/or downstream of the variant
                    continue;
                }
            }
            int distance = 0;
            if (type.equals("upstream")) {
                distance = pos - json_gene.getInt("end");
            } else if (type.equals("downstream")) {
                distance = json_gene.getInt("start") - pos;
            }
            if ((distance < closest_distance && distance > 0) || closest_distance == 0) {
                closest_gene = gene_id;
                closest_distance = distance;
            }
        }
    }
    for (int i = 0; i < json_gene_list.length(); ++i) {
        JSONObject json_gene = json_gene_list.getJSONObject(i);
        String gene_name = json_gene.getString("external_name");
        // If the source is NCBI, we parse the ID from the description:
        String gene_id = source.equals(getNcbiSource()) ? parseNCBIid(json_gene.getString("description"), gene_name) : json_gene.getString("id");
        String ncbi_id = (source.equals("NCBI")) ? gene_id : null;
        String ensembl_id = (source.equals("Ensembl")) ? gene_id : null;
        int distance = 0;
        if (intergenic) {
            if (source.equals(getNcbiSource())) {
                if ((gene_name != null && getEnsemblMappingResult().getNcbiOverlappingGene().contains(gene_name)) || gene_name == null) {
                    // Skip overlapping genes which also overlap upstream and/or downstream of the variant
                    continue;
                }
            } else {
                if ((gene_name != null && getEnsemblMappingResult().getEnsemblOverlappingGene().contains(gene_name)) || gene_name == null) {
                    // Skip overlapping genes which also overlap upstream and/or downstream of the variant
                    continue;
                }
            }
            int pos = position;
            if (type.equals("upstream")) {
                distance = pos - json_gene.getInt("end");
            } else if (type.equals("downstream")) {
                distance = json_gene.getInt("start") - pos;
            }
        }
        Long dist = (long) distance;
        EntrezGene entrezGene = new EntrezGene();
        entrezGene.setEntrezGeneId(ncbi_id);
        Collection<EntrezGene> entrezGenes = new ArrayList<>();
        entrezGenes.add(entrezGene);
        EnsemblGene ensemblGene = new EnsemblGene();
        ensemblGene.setEnsemblGeneId(ensembl_id);
        Collection<EnsemblGene> ensemblGenes = new ArrayList<>();
        ensemblGenes.add(ensemblGene);
        Gene gene_object = new Gene(gene_name, entrezGenes, ensemblGenes);
        // Check if the gene corresponds to the closest gene
        boolean is_closest_gene = (closest_gene.equals(gene_id) && closest_gene != "") ? true : false;
        GenomicContext gc = new GenomicContext(intergenic, upstream, downstream, dist, snp_tmp, gene_object, snp_location, source, getMappingMethod(), is_closest_gene);
        getEnsemblMappingResult().addGenomicContext(gc);
    }
    return (closest_gene != "") ? true : false;
}
Also used : EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) ArrayList(java.util.ArrayList) GenomicContext(uk.ac.ebi.spot.goci.model.GenomicContext) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) JSONObject(org.json.JSONObject) EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) Gene(uk.ac.ebi.spot.goci.model.Gene) SingleNucleotidePolymorphism(uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism)

Example 2 with EntrezGene

use of uk.ac.ebi.spot.goci.model.EntrezGene in project goci by EBISPOT.

the class SnpGenomicContextMappingService method createGene.

/**
 * Method to create a gene
 *
 * @param geneName    gene symbol or name
 * @param externalIds external gene IDs
 * @param source      the source of mapping, either Ensembl or Entrez
 */
private Gene createGene(String geneName, Set<String> externalIds, String source) {
    // Create new gene
    Gene newGene = new Gene();
    newGene.setGeneName(geneName);
    if (source.equalsIgnoreCase("Ensembl")) {
        // Set Ensembl Ids for new gene
        Collection<EnsemblGene> ensemblGeneIds = new ArrayList<>();
        for (String id : externalIds) {
            EnsemblGene ensemblGene = createOrRetrieveEnsemblExternalId(id, geneName);
            ensemblGeneIds.add(ensemblGene);
        }
        newGene.setEnsemblGeneIds(ensemblGeneIds);
    }
    if (source.equalsIgnoreCase("Entrez")) {
        // Set Entrez Ids for new gene
        Collection<EntrezGene> entrezGeneIds = new ArrayList<>();
        for (String id : externalIds) {
            EntrezGene entrezGene = createOrRetrieveEntrezExternalId(id, geneName);
            entrezGeneIds.add(entrezGene);
        }
        newGene.setEntrezGeneIds(entrezGeneIds);
    }
    // Save gene
    getLog().debug("Creating " + source + " gene, with name " + geneName);
    // geneRepository.save(newGene);
    return newGene;
}
Also used : EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) Gene(uk.ac.ebi.spot.goci.model.Gene) EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) ArrayList(java.util.ArrayList) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene)

Example 3 with EntrezGene

use of uk.ac.ebi.spot.goci.model.EntrezGene in project goci by EBISPOT.

the class SnpGenomicContextMappingService method cleanUpEntrezGenes.

/**
 * Method to clean-up an Entrez gene ID in database that has no linked gene
 *
 * @param id Entrez gene ID to delete
 */
private void cleanUpEntrezGenes(Long id, List<EntrezGene> entrezGenesToDelete) {
    // Find any genes with this Entrez ID
    EntrezGene entrezGene = entrezGeneRepository.findOne(id);
    Gene geneWithEntrezIds = entrezGene.getGene();
    // If this ID is not linked to a gene then delete it
    if (geneWithEntrezIds == null) {
        // entrezGeneRepository.delete(id);
        entrezGenesToDelete.add(entrezGene);
    }
}
Also used : EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) Gene(uk.ac.ebi.spot.goci.model.Gene) EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene)

Example 4 with EntrezGene

use of uk.ac.ebi.spot.goci.model.EntrezGene in project goci by EBISPOT.

the class AssociationService method loadAssociatedData.

@Transactional(readOnly = true)
public void loadAssociatedData(Association association) {
    int traitCount = association.getEfoTraits().size();
    // Study study = studyService.fetchOne(association.getStudy());
    Study study = association.getStudy();
    AtomicInteger reportedGeneCount = new AtomicInteger();
    Collection<SingleNucleotidePolymorphism> snps = new HashSet<>();
    Collection<SingleNucleotidePolymorphism> proxySnps = new HashSet<>();
    Collection<Region> regions = new HashSet<>();
    Collection<Gene> mappedGenes = new HashSet<>();
    // Map<String, Set<String>> mappedGeneEntrezIds = new HashMap<>();
    Map<String, Set<String>> mappedGeneEnsemblIds = new HashMap<>();
    association.getLoci().forEach(locus -> {
        locus.getStrongestRiskAlleles().stream().map(RiskAllele::getSnp).forEach(snp -> {
            Collection<Location> snpLocations = snp.getLocations();
            for (Location location : snpLocations) {
                regions.add(location.getRegion());
            }
            snp.getGenomicContexts().forEach(context -> {
                mappedGenes.add(context.getGene());
                String geneName = context.getGene().getGeneName();
                Collection<EntrezGene> geneEntrezGeneIds = context.getGene().getEntrezGeneIds();
                Collection<EnsemblGene> geneEnsemblGeneIds = context.getGene().getEnsemblGeneIds();
                if (mappedGeneEnsemblIds.containsKey(geneName)) {
                    for (EnsemblGene ensemblGene : geneEnsemblGeneIds) {
                        mappedGeneEnsemblIds.get(geneName).add(ensemblGene.getEnsemblGeneId());
                    }
                } else // First time we see a SNP store the location
                {
                    Set<String> ensemblIds = new HashSet<>();
                    for (EnsemblGene ensemblGene : geneEnsemblGeneIds) {
                        ensemblIds.add(ensemblGene.getEnsemblGeneId());
                    }
                    // mappedGeneEntrezIds.put(geneName,
                    // ensemblIds);
                    mappedGeneEnsemblIds.put(geneName, ensemblIds);
                }
            });
            snps.add(snp);
        });
        snps.addAll(locus.getStrongestRiskAlleles().stream().map(RiskAllele::getSnp).collect(Collectors.toList()));
        locus.getStrongestRiskAlleles().forEach(riskAllele -> {
            if (riskAllele.getProxySnps() != null) {
                proxySnps.addAll(riskAllele.getProxySnps());
            }
        });
        reportedGeneCount.addAndGet(locus.getAuthorReportedGenes().size());
        locus.getAuthorReportedGenes().forEach(authorReportedGene -> {
            authorReportedGene.getEnsemblGeneIds().size();
        // authorReportedGene.getEntrezGeneIds().size();
        });
    });
    getLog().trace("Association '" + association.getId() + "' is mapped to " + "" + traitCount + " EFO traits where study id = " + study.getId() + " " + "(author reported " + reportedGeneCount + " gene(s)); " + "this reports on " + snps.size() + " SNPs in " + regions.size() + " regions, " + "mapped to " + mappedGenes.size() + " genes.");
}
Also used : Study(uk.ac.ebi.spot.goci.model.Study) HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) RiskAllele(uk.ac.ebi.spot.goci.model.RiskAllele) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) Gene(uk.ac.ebi.spot.goci.model.Gene) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SingleNucleotidePolymorphism(uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism) Region(uk.ac.ebi.spot.goci.model.Region) HashSet(java.util.HashSet) Location(uk.ac.ebi.spot.goci.model.Location) Transactional(org.springframework.transaction.annotation.Transactional)

Example 5 with EntrezGene

use of uk.ac.ebi.spot.goci.model.EntrezGene in project goci by EBISPOT.

the class SnpGenomicContextMappingService method processGenes.

/**
 * Extract gene information from genomic contexts returned from mapping pipeline
 *
 * @param genomicContexts object holding gene and snp mapping information
 */
private void processGenes(Collection<GenomicContext> genomicContexts) {
    getLog().debug("Processing genes...");
    // Need to flatten down genomic context gene information
    // and create structure linking each gene symbol to its
    // complete set of current Ensembl and Entrez IDs
    Map<String, Set<String>> geneToEnsemblIdMap = new HashMap<>();
    Map<String, Set<String>> geneToEntrezIdMap = new HashMap<>();
    // Loop over each genomic context and store information on external IDs linked to gene symbol
    for (GenomicContext genomicContext : genomicContexts) {
        // Check gene exists
        String geneName = genomicContext.getGene().getGeneName().trim();
        if (!geneName.equalsIgnoreCase("undefined")) {
            // Retrieve the latest Ensembl/Entrez IDs for the named gene from the latest mapping run
            Collection<EnsemblGene> ensemblGeneIds = genomicContext.getGene().getEnsemblGeneIds();
            for (EnsemblGene ensemblGene : ensemblGeneIds) {
                String ensemblId = ensemblGene.getEnsemblGeneId();
                if (ensemblId != null) {
                    if (geneToEnsemblIdMap.containsKey(geneName)) {
                        geneToEnsemblIdMap.get(geneName).add(ensemblId);
                    } else {
                        Set<String> ensemblGeneIdsSet = new HashSet<>();
                        ensemblGeneIdsSet.add(ensemblId);
                        geneToEnsemblIdMap.put(geneName, ensemblGeneIdsSet);
                    }
                }
            }
            Collection<EntrezGene> entrezGeneIds = genomicContext.getGene().getEntrezGeneIds();
            for (EntrezGene entrezGene : entrezGeneIds) {
                String entrezId = entrezGene.getEntrezGeneId();
                if (entrezId != null) {
                    if (geneToEntrezIdMap.containsKey(geneName)) {
                        geneToEntrezIdMap.get(geneName).add(entrezId);
                    } else {
                        Set<String> entrezGeneIdsSet = new HashSet<>();
                        entrezGeneIdsSet.add(entrezId);
                        geneToEntrezIdMap.put(geneName, entrezGeneIdsSet);
                    }
                }
            }
        }
    }
    // Store genes, source is required so we know what table to add them to
    if (geneToEnsemblIdMap.size() > 0) {
        storeGenes(geneToEnsemblIdMap, "Ensembl");
    }
    if (geneToEntrezIdMap.size() > 0) {
        storeGenes(geneToEntrezIdMap, "Entrez");
    }
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) GenomicContext(uk.ac.ebi.spot.goci.model.GenomicContext) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) HashSet(java.util.HashSet)

Aggregations

EnsemblGene (uk.ac.ebi.spot.goci.model.EnsemblGene)7 EntrezGene (uk.ac.ebi.spot.goci.model.EntrezGene)7 Gene (uk.ac.ebi.spot.goci.model.Gene)6 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 Set (java.util.Set)2 GenomicContext (uk.ac.ebi.spot.goci.model.GenomicContext)2 SingleNucleotidePolymorphism (uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 JSONObject (org.json.JSONObject)1 Transactional (org.springframework.transaction.annotation.Transactional)1 Location (uk.ac.ebi.spot.goci.model.Location)1 Region (uk.ac.ebi.spot.goci.model.Region)1 RiskAllele (uk.ac.ebi.spot.goci.model.RiskAllele)1 Study (uk.ac.ebi.spot.goci.model.Study)1