Search in sources :

Example 1 with EnsemblGene

use of uk.ac.ebi.spot.goci.model.EnsemblGene in project goci by EBISPOT.

the class EnsemblMappingPipeline method addGenomicContext.

/**
 * Create GenomicContext objects from the JSONObjects and add them to the class variable "genomic_contexts" (list of
 * "GenomicContext" classes)
 *
 * @param json_gene_list the list of overlapping genes in JSONObject format
 * @param snp_location   an instance of the Location class (chromosome name and position)
 * @param source         the source of the data (Ensembl or NCBI)
 * @param type           the type of genomic context (i.e. overlap, upstream, downstream)
 * @return boolean to indicate whether a closest gene has been found or not (only relevant for upstream and
 * downstream gene)
 */
private boolean addGenomicContext(JSONArray json_gene_list, Location snp_location, String source, String type) {
    String closest_gene = "";
    int closest_distance = 0;
    boolean intergenic = (type.equals("overlap")) ? false : true;
    boolean upstream = (type.equals("upstream")) ? true : false;
    boolean downstream = (type.equals("downstream")) ? true : false;
    Integer position = snp_location.getChromosomePosition();
    SingleNucleotidePolymorphism snp_tmp = new SingleNucleotidePolymorphism();
    snp_tmp.setRsId(getEnsemblMappingResult().getRsId());
    if (getEnsemblMappingResult().getRsId() == null) {
        throw new IllegalArgumentException("error, no RS ID found for location " + snp_location.getId());
    }
    // Get closest gene
    if (intergenic) {
        int pos = position;
        for (int i = 0; i < json_gene_list.length(); ++i) {
            JSONObject json_gene = json_gene_list.getJSONObject(i);
            String gene_name = json_gene.getString("external_name");
            // If the source is NCBI, we parse the ID from the description:
            String gene_id = source.equals(getNcbiSource()) ? parseNCBIid(json_gene.getString("description"), gene_name) : json_gene.getString("id");
            if (source.equals(getNcbiSource())) {
                if ((gene_name != null && getEnsemblMappingResult().getNcbiOverlappingGene().contains(gene_name)) || gene_name == null) {
                    // Skip overlapping genes which also overlap upstream and/or downstream of the variant
                    continue;
                }
            } else {
                if ((gene_name != null && getEnsemblMappingResult().getEnsemblOverlappingGene().contains(gene_name)) || gene_name == null) {
                    // Skip overlapping genes which also overlap upstream and/or downstream of the variant
                    continue;
                }
            }
            int distance = 0;
            if (type.equals("upstream")) {
                distance = pos - json_gene.getInt("end");
            } else if (type.equals("downstream")) {
                distance = json_gene.getInt("start") - pos;
            }
            if ((distance < closest_distance && distance > 0) || closest_distance == 0) {
                closest_gene = gene_id;
                closest_distance = distance;
            }
        }
    }
    for (int i = 0; i < json_gene_list.length(); ++i) {
        JSONObject json_gene = json_gene_list.getJSONObject(i);
        String gene_name = json_gene.getString("external_name");
        // If the source is NCBI, we parse the ID from the description:
        String gene_id = source.equals(getNcbiSource()) ? parseNCBIid(json_gene.getString("description"), gene_name) : json_gene.getString("id");
        String ncbi_id = (source.equals("NCBI")) ? gene_id : null;
        String ensembl_id = (source.equals("Ensembl")) ? gene_id : null;
        int distance = 0;
        if (intergenic) {
            if (source.equals(getNcbiSource())) {
                if ((gene_name != null && getEnsemblMappingResult().getNcbiOverlappingGene().contains(gene_name)) || gene_name == null) {
                    // Skip overlapping genes which also overlap upstream and/or downstream of the variant
                    continue;
                }
            } else {
                if ((gene_name != null && getEnsemblMappingResult().getEnsemblOverlappingGene().contains(gene_name)) || gene_name == null) {
                    // Skip overlapping genes which also overlap upstream and/or downstream of the variant
                    continue;
                }
            }
            int pos = position;
            if (type.equals("upstream")) {
                distance = pos - json_gene.getInt("end");
            } else if (type.equals("downstream")) {
                distance = json_gene.getInt("start") - pos;
            }
        }
        Long dist = (long) distance;
        EntrezGene entrezGene = new EntrezGene();
        entrezGene.setEntrezGeneId(ncbi_id);
        Collection<EntrezGene> entrezGenes = new ArrayList<>();
        entrezGenes.add(entrezGene);
        EnsemblGene ensemblGene = new EnsemblGene();
        ensemblGene.setEnsemblGeneId(ensembl_id);
        Collection<EnsemblGene> ensemblGenes = new ArrayList<>();
        ensemblGenes.add(ensemblGene);
        Gene gene_object = new Gene(gene_name, entrezGenes, ensemblGenes);
        // Check if the gene corresponds to the closest gene
        boolean is_closest_gene = (closest_gene.equals(gene_id) && closest_gene != "") ? true : false;
        GenomicContext gc = new GenomicContext(intergenic, upstream, downstream, dist, snp_tmp, gene_object, snp_location, source, getMappingMethod(), is_closest_gene);
        getEnsemblMappingResult().addGenomicContext(gc);
    }
    return (closest_gene != "") ? true : false;
}
Also used : EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) ArrayList(java.util.ArrayList) GenomicContext(uk.ac.ebi.spot.goci.model.GenomicContext) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) JSONObject(org.json.JSONObject) EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) Gene(uk.ac.ebi.spot.goci.model.Gene) SingleNucleotidePolymorphism(uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism)

Example 2 with EnsemblGene

use of uk.ac.ebi.spot.goci.model.EnsemblGene in project goci by EBISPOT.

the class SnpGenomicContextMappingService method createOrRetrieveEnsemblExternalId.

/**
 * Method to create an Ensembl gene, this database table holds ensembl gene IDs
 *
 * @param id       Ensembl gene ID
 * @param geneName Gene name allows method to check if this id is actually already linked to another gene
 */
private EnsemblGene createOrRetrieveEnsemblExternalId(String id, String geneName) {
    EnsemblGene ensemblGene = ensemblGeneQueryService.findByEnsemblGeneId(id);
    // Create new entry in ENSEMBL_GENE table for this ID
    if (ensemblGene == null) {
        ensemblGene = new EnsemblGene();
        ensemblGene.setEnsemblGeneId(id);
        ensemblGeneRepository.save(ensemblGene);
    } else // Check this ID is not linked to a gene with a different name
    {
        Gene existingGeneLinkedToId = ensemblGene.getGene();
        if (existingGeneLinkedToId != null) {
            if (!Objects.equals(existingGeneLinkedToId.getGeneName(), geneName)) {
                getLog().warn("Ensembl ID: " + id + ", is already used in database by a different gene(s): " + existingGeneLinkedToId.getGeneName() + ". Will update so links to " + geneName);
                // For gene already linked to this ensembl ID remove the ensembl ID
                existingGeneLinkedToId.getEnsemblGeneIds().remove(ensemblGene);
                geneRepository.save(existingGeneLinkedToId);
            }
        }
    }
    return ensemblGene;
}
Also used : EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) Gene(uk.ac.ebi.spot.goci.model.Gene) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene)

Example 3 with EnsemblGene

use of uk.ac.ebi.spot.goci.model.EnsemblGene in project goci by EBISPOT.

the class SnpGenomicContextMappingService method createGene.

/**
 * Method to create a gene
 *
 * @param geneName    gene symbol or name
 * @param externalIds external gene IDs
 * @param source      the source of mapping, either Ensembl or Entrez
 */
private Gene createGene(String geneName, Set<String> externalIds, String source) {
    // Create new gene
    Gene newGene = new Gene();
    newGene.setGeneName(geneName);
    if (source.equalsIgnoreCase("Ensembl")) {
        // Set Ensembl Ids for new gene
        Collection<EnsemblGene> ensemblGeneIds = new ArrayList<>();
        for (String id : externalIds) {
            EnsemblGene ensemblGene = createOrRetrieveEnsemblExternalId(id, geneName);
            ensemblGeneIds.add(ensemblGene);
        }
        newGene.setEnsemblGeneIds(ensemblGeneIds);
    }
    if (source.equalsIgnoreCase("Entrez")) {
        // Set Entrez Ids for new gene
        Collection<EntrezGene> entrezGeneIds = new ArrayList<>();
        for (String id : externalIds) {
            EntrezGene entrezGene = createOrRetrieveEntrezExternalId(id, geneName);
            entrezGeneIds.add(entrezGene);
        }
        newGene.setEntrezGeneIds(entrezGeneIds);
    }
    // Save gene
    getLog().debug("Creating " + source + " gene, with name " + geneName);
    // geneRepository.save(newGene);
    return newGene;
}
Also used : EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) Gene(uk.ac.ebi.spot.goci.model.Gene) EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) ArrayList(java.util.ArrayList) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene)

Example 4 with EnsemblGene

use of uk.ac.ebi.spot.goci.model.EnsemblGene in project goci by EBISPOT.

the class AssociationService method loadAssociatedData.

@Transactional(readOnly = true)
public void loadAssociatedData(Association association) {
    int traitCount = association.getEfoTraits().size();
    // Study study = studyService.fetchOne(association.getStudy());
    Study study = association.getStudy();
    AtomicInteger reportedGeneCount = new AtomicInteger();
    Collection<SingleNucleotidePolymorphism> snps = new HashSet<>();
    Collection<SingleNucleotidePolymorphism> proxySnps = new HashSet<>();
    Collection<Region> regions = new HashSet<>();
    Collection<Gene> mappedGenes = new HashSet<>();
    // Map<String, Set<String>> mappedGeneEntrezIds = new HashMap<>();
    Map<String, Set<String>> mappedGeneEnsemblIds = new HashMap<>();
    association.getLoci().forEach(locus -> {
        locus.getStrongestRiskAlleles().stream().map(RiskAllele::getSnp).forEach(snp -> {
            Collection<Location> snpLocations = snp.getLocations();
            for (Location location : snpLocations) {
                regions.add(location.getRegion());
            }
            snp.getGenomicContexts().forEach(context -> {
                mappedGenes.add(context.getGene());
                String geneName = context.getGene().getGeneName();
                Collection<EntrezGene> geneEntrezGeneIds = context.getGene().getEntrezGeneIds();
                Collection<EnsemblGene> geneEnsemblGeneIds = context.getGene().getEnsemblGeneIds();
                if (mappedGeneEnsemblIds.containsKey(geneName)) {
                    for (EnsemblGene ensemblGene : geneEnsemblGeneIds) {
                        mappedGeneEnsemblIds.get(geneName).add(ensemblGene.getEnsemblGeneId());
                    }
                } else // First time we see a SNP store the location
                {
                    Set<String> ensemblIds = new HashSet<>();
                    for (EnsemblGene ensemblGene : geneEnsemblGeneIds) {
                        ensemblIds.add(ensemblGene.getEnsemblGeneId());
                    }
                    // mappedGeneEntrezIds.put(geneName,
                    // ensemblIds);
                    mappedGeneEnsemblIds.put(geneName, ensemblIds);
                }
            });
            snps.add(snp);
        });
        snps.addAll(locus.getStrongestRiskAlleles().stream().map(RiskAllele::getSnp).collect(Collectors.toList()));
        locus.getStrongestRiskAlleles().forEach(riskAllele -> {
            if (riskAllele.getProxySnps() != null) {
                proxySnps.addAll(riskAllele.getProxySnps());
            }
        });
        reportedGeneCount.addAndGet(locus.getAuthorReportedGenes().size());
        locus.getAuthorReportedGenes().forEach(authorReportedGene -> {
            authorReportedGene.getEnsemblGeneIds().size();
        // authorReportedGene.getEntrezGeneIds().size();
        });
    });
    getLog().trace("Association '" + association.getId() + "' is mapped to " + "" + traitCount + " EFO traits where study id = " + study.getId() + " " + "(author reported " + reportedGeneCount + " gene(s)); " + "this reports on " + snps.size() + " SNPs in " + regions.size() + " regions, " + "mapped to " + mappedGenes.size() + " genes.");
}
Also used : Study(uk.ac.ebi.spot.goci.model.Study) HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) RiskAllele(uk.ac.ebi.spot.goci.model.RiskAllele) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) Gene(uk.ac.ebi.spot.goci.model.Gene) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SingleNucleotidePolymorphism(uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism) Region(uk.ac.ebi.spot.goci.model.Region) HashSet(java.util.HashSet) Location(uk.ac.ebi.spot.goci.model.Location) Transactional(org.springframework.transaction.annotation.Transactional)

Example 5 with EnsemblGene

use of uk.ac.ebi.spot.goci.model.EnsemblGene in project goci by EBISPOT.

the class SnpGenomicContextMappingService method cleanUpEnsemblGenes.

/**
 * Method to clean-up an Ensembl gene ID in database that has no linked gene
 *
 * @param id Ensembl gene ID to delete
 */
private void cleanUpEnsemblGenes(Long id, List<EnsemblGene> ensemblGenesToDelete) {
    // Find any genes with this Ensembl ID
    EnsemblGene ensemblGene = ensemblGeneRepository.findOne(id);
    Gene geneWithEnsemblId = ensemblGene.getGene();
    // If this ID is not linked to a gene then delete it
    if (geneWithEnsemblId == null) {
        // ensemblGeneRepository.delete(id);
        ensemblGenesToDelete.add(ensemblGene);
    }
}
Also used : EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) Gene(uk.ac.ebi.spot.goci.model.Gene) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene)

Aggregations

EnsemblGene (uk.ac.ebi.spot.goci.model.EnsemblGene)7 EntrezGene (uk.ac.ebi.spot.goci.model.EntrezGene)7 Gene (uk.ac.ebi.spot.goci.model.Gene)6 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 Set (java.util.Set)2 GenomicContext (uk.ac.ebi.spot.goci.model.GenomicContext)2 SingleNucleotidePolymorphism (uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 JSONObject (org.json.JSONObject)1 Transactional (org.springframework.transaction.annotation.Transactional)1 Location (uk.ac.ebi.spot.goci.model.Location)1 Region (uk.ac.ebi.spot.goci.model.Region)1 RiskAllele (uk.ac.ebi.spot.goci.model.RiskAllele)1 Study (uk.ac.ebi.spot.goci.model.Study)1