Search in sources :

Example 1 with SingleNucleotidePolymorphism

use of uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism in project goci by EBISPOT.

the class EnsemblMappingPipeline method addGenomicContext.

/**
 * Create GenomicContext objects from the JSONObjects and add them to the class variable "genomic_contexts" (list of
 * "GenomicContext" classes)
 *
 * @param json_gene_list the list of overlapping genes in JSONObject format
 * @param snp_location   an instance of the Location class (chromosome name and position)
 * @param source         the source of the data (Ensembl or NCBI)
 * @param type           the type of genomic context (i.e. overlap, upstream, downstream)
 * @return boolean to indicate whether a closest gene has been found or not (only relevant for upstream and
 * downstream gene)
 */
private boolean addGenomicContext(JSONArray json_gene_list, Location snp_location, String source, String type) {
    String closest_gene = "";
    int closest_distance = 0;
    boolean intergenic = (type.equals("overlap")) ? false : true;
    boolean upstream = (type.equals("upstream")) ? true : false;
    boolean downstream = (type.equals("downstream")) ? true : false;
    Integer position = snp_location.getChromosomePosition();
    SingleNucleotidePolymorphism snp_tmp = new SingleNucleotidePolymorphism();
    snp_tmp.setRsId(getEnsemblMappingResult().getRsId());
    if (getEnsemblMappingResult().getRsId() == null) {
        throw new IllegalArgumentException("error, no RS ID found for location " + snp_location.getId());
    }
    // Get closest gene
    if (intergenic) {
        int pos = position;
        for (int i = 0; i < json_gene_list.length(); ++i) {
            JSONObject json_gene = json_gene_list.getJSONObject(i);
            String gene_name = json_gene.getString("external_name");
            // If the source is NCBI, we parse the ID from the description:
            String gene_id = source.equals(getNcbiSource()) ? parseNCBIid(json_gene.getString("description"), gene_name) : json_gene.getString("id");
            if (source.equals(getNcbiSource())) {
                if ((gene_name != null && getEnsemblMappingResult().getNcbiOverlappingGene().contains(gene_name)) || gene_name == null) {
                    // Skip overlapping genes which also overlap upstream and/or downstream of the variant
                    continue;
                }
            } else {
                if ((gene_name != null && getEnsemblMappingResult().getEnsemblOverlappingGene().contains(gene_name)) || gene_name == null) {
                    // Skip overlapping genes which also overlap upstream and/or downstream of the variant
                    continue;
                }
            }
            int distance = 0;
            if (type.equals("upstream")) {
                distance = pos - json_gene.getInt("end");
            } else if (type.equals("downstream")) {
                distance = json_gene.getInt("start") - pos;
            }
            if ((distance < closest_distance && distance > 0) || closest_distance == 0) {
                closest_gene = gene_id;
                closest_distance = distance;
            }
        }
    }
    for (int i = 0; i < json_gene_list.length(); ++i) {
        JSONObject json_gene = json_gene_list.getJSONObject(i);
        String gene_name = json_gene.getString("external_name");
        // If the source is NCBI, we parse the ID from the description:
        String gene_id = source.equals(getNcbiSource()) ? parseNCBIid(json_gene.getString("description"), gene_name) : json_gene.getString("id");
        String ncbi_id = (source.equals("NCBI")) ? gene_id : null;
        String ensembl_id = (source.equals("Ensembl")) ? gene_id : null;
        int distance = 0;
        if (intergenic) {
            if (source.equals(getNcbiSource())) {
                if ((gene_name != null && getEnsemblMappingResult().getNcbiOverlappingGene().contains(gene_name)) || gene_name == null) {
                    // Skip overlapping genes which also overlap upstream and/or downstream of the variant
                    continue;
                }
            } else {
                if ((gene_name != null && getEnsemblMappingResult().getEnsemblOverlappingGene().contains(gene_name)) || gene_name == null) {
                    // Skip overlapping genes which also overlap upstream and/or downstream of the variant
                    continue;
                }
            }
            int pos = position;
            if (type.equals("upstream")) {
                distance = pos - json_gene.getInt("end");
            } else if (type.equals("downstream")) {
                distance = json_gene.getInt("start") - pos;
            }
        }
        Long dist = (long) distance;
        EntrezGene entrezGene = new EntrezGene();
        entrezGene.setEntrezGeneId(ncbi_id);
        Collection<EntrezGene> entrezGenes = new ArrayList<>();
        entrezGenes.add(entrezGene);
        EnsemblGene ensemblGene = new EnsemblGene();
        ensemblGene.setEnsemblGeneId(ensembl_id);
        Collection<EnsemblGene> ensemblGenes = new ArrayList<>();
        ensemblGenes.add(ensemblGene);
        Gene gene_object = new Gene(gene_name, entrezGenes, ensemblGenes);
        // Check if the gene corresponds to the closest gene
        boolean is_closest_gene = (closest_gene.equals(gene_id) && closest_gene != "") ? true : false;
        GenomicContext gc = new GenomicContext(intergenic, upstream, downstream, dist, snp_tmp, gene_object, snp_location, source, getMappingMethod(), is_closest_gene);
        getEnsemblMappingResult().addGenomicContext(gc);
    }
    return (closest_gene != "") ? true : false;
}
Also used : EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) ArrayList(java.util.ArrayList) GenomicContext(uk.ac.ebi.spot.goci.model.GenomicContext) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) JSONObject(org.json.JSONObject) EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) Gene(uk.ac.ebi.spot.goci.model.Gene) SingleNucleotidePolymorphism(uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism)

Example 2 with SingleNucleotidePolymorphism

use of uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism in project goci by EBISPOT.

the class SnpGenomicContextMappingService method storeSnpGenomicContext.

/**
 * Saves genomic context information to database
 *
 * @param snpToGenomicContextMap map of rs_id and all genomic context details returned from current mapping run
 */
private void storeSnpGenomicContext(Map<String, Set<GenomicContext>> snpToGenomicContextMap) {
    List<SingleNucleotidePolymorphism> updatedSnps = new ArrayList<>();
    // Go through each rs_id and its associated genomic contexts returned from the mapping pipeline
    for (String snpRsId : snpToGenomicContextMap.keySet()) {
        getLog().debug("Storing genomic context for " + snpRsId);
        Set<GenomicContext> genomicContextsFromMapping = snpToGenomicContextMap.get(snpRsId);
        // Check if the SNP exists
        SingleNucleotidePolymorphism snpInDatabase = singleNucleotidePolymorphismRepository.findByRsId(snpRsId);
        if (snpInDatabase == null) {
            snpInDatabase = singleNucleotidePolymorphismQueryService.findByRsIdIgnoreCase(snpRsId);
        }
        if (snpInDatabase != null) {
            Collection<GenomicContext> newSnpGenomicContexts = new ArrayList<>();
            for (GenomicContext genomicContextFromMapping : genomicContextsFromMapping) {
                // Gene should already have been created
                String geneName = genomicContextFromMapping.getGene().getGeneName().trim();
                if (!geneName.equalsIgnoreCase("undefined")) {
                    // Create new genomic context
                    Boolean isIntergenic = genomicContextFromMapping.getIsIntergenic();
                    Boolean isUpstream = genomicContextFromMapping.getIsUpstream();
                    Boolean isDownstream = genomicContextFromMapping.getIsDownstream();
                    Long distance = genomicContextFromMapping.getDistance();
                    String source = genomicContextFromMapping.getSource();
                    String mappingMethod = genomicContextFromMapping.getMappingMethod();
                    Boolean isClosestGene = genomicContextFromMapping.getIsClosestGene();
                    // Location details
                    String chromosomeName = genomicContextFromMapping.getLocation().getChromosomeName();
                    Integer chromosomePosition = genomicContextFromMapping.getLocation().getChromosomePosition();
                    Region regionFromMapping = genomicContextFromMapping.getLocation().getRegion();
                    String regionName = null;
                    if (regionFromMapping.getName() != null) {
                        regionName = regionFromMapping.getName().trim();
                    }
                    // Check if location already exists
                    Location location = locationRepository.findByChromosomeNameAndChromosomePositionAndRegionName(chromosomeName, chromosomePosition, regionName);
                    if (location == null) {
                        location = locationCreationService.createLocation(chromosomeName, chromosomePosition, regionName);
                    }
                    GenomicContext genomicContext = genomicContextCreationService.createGenomicContext(isIntergenic, isUpstream, isDownstream, distance, source, mappingMethod, geneName, snpInDatabase, isClosestGene, location);
                    newSnpGenomicContexts.add(genomicContext);
                } else {
                    getLog().warn("Gene name returned from mapping pipeline is 'undefined' for SNP" + snpInDatabase.getRsId());
                }
            }
            // Save latest mapped information
            snpInDatabase.setGenomicContexts(newSnpGenomicContexts);
            // Update the last update date
            snpInDatabase.setLastUpdateDate(new Date());
            // singleNucleotidePolymorphismRepository.save(snpInDatabase);
            updatedSnps.add(snpInDatabase);
        } else // SNP doesn't exist, this should be extremely rare as SNP value is a copy
        // of the variant entered by the curator which
        // by the time mapping is started should already have been saved
        {
            // TODO WHAT WILL HAPPEN FOR MERGED SNPS
            getLog().error("Adding genomic context for SNP not found in database, RS_ID:" + snpRsId);
            throw new RuntimeException("Adding genomic context for SNP not found in database, RS_ID: " + snpRsId);
        }
    }
    singleNucleotidePolymorphismRepository.save(updatedSnps);
}
Also used : ArrayList(java.util.ArrayList) GenomicContext(uk.ac.ebi.spot.goci.model.GenomicContext) Date(java.util.Date) SingleNucleotidePolymorphism(uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism) Region(uk.ac.ebi.spot.goci.model.Region) Location(uk.ac.ebi.spot.goci.model.Location)

Example 3 with SingleNucleotidePolymorphism

use of uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism in project goci by EBISPOT.

the class AssociationService method loadAssociatedData.

@Transactional(readOnly = true)
public void loadAssociatedData(Association association) {
    int traitCount = association.getEfoTraits().size();
    // Study study = studyService.fetchOne(association.getStudy());
    Study study = association.getStudy();
    AtomicInteger reportedGeneCount = new AtomicInteger();
    Collection<SingleNucleotidePolymorphism> snps = new HashSet<>();
    Collection<SingleNucleotidePolymorphism> proxySnps = new HashSet<>();
    Collection<Region> regions = new HashSet<>();
    Collection<Gene> mappedGenes = new HashSet<>();
    // Map<String, Set<String>> mappedGeneEntrezIds = new HashMap<>();
    Map<String, Set<String>> mappedGeneEnsemblIds = new HashMap<>();
    association.getLoci().forEach(locus -> {
        locus.getStrongestRiskAlleles().stream().map(RiskAllele::getSnp).forEach(snp -> {
            Collection<Location> snpLocations = snp.getLocations();
            for (Location location : snpLocations) {
                regions.add(location.getRegion());
            }
            snp.getGenomicContexts().forEach(context -> {
                mappedGenes.add(context.getGene());
                String geneName = context.getGene().getGeneName();
                Collection<EntrezGene> geneEntrezGeneIds = context.getGene().getEntrezGeneIds();
                Collection<EnsemblGene> geneEnsemblGeneIds = context.getGene().getEnsemblGeneIds();
                if (mappedGeneEnsemblIds.containsKey(geneName)) {
                    for (EnsemblGene ensemblGene : geneEnsemblGeneIds) {
                        mappedGeneEnsemblIds.get(geneName).add(ensemblGene.getEnsemblGeneId());
                    }
                } else // First time we see a SNP store the location
                {
                    Set<String> ensemblIds = new HashSet<>();
                    for (EnsemblGene ensemblGene : geneEnsemblGeneIds) {
                        ensemblIds.add(ensemblGene.getEnsemblGeneId());
                    }
                    // mappedGeneEntrezIds.put(geneName,
                    // ensemblIds);
                    mappedGeneEnsemblIds.put(geneName, ensemblIds);
                }
            });
            snps.add(snp);
        });
        snps.addAll(locus.getStrongestRiskAlleles().stream().map(RiskAllele::getSnp).collect(Collectors.toList()));
        locus.getStrongestRiskAlleles().forEach(riskAllele -> {
            if (riskAllele.getProxySnps() != null) {
                proxySnps.addAll(riskAllele.getProxySnps());
            }
        });
        reportedGeneCount.addAndGet(locus.getAuthorReportedGenes().size());
        locus.getAuthorReportedGenes().forEach(authorReportedGene -> {
            authorReportedGene.getEnsemblGeneIds().size();
        // authorReportedGene.getEntrezGeneIds().size();
        });
    });
    getLog().trace("Association '" + association.getId() + "' is mapped to " + "" + traitCount + " EFO traits where study id = " + study.getId() + " " + "(author reported " + reportedGeneCount + " gene(s)); " + "this reports on " + snps.size() + " SNPs in " + regions.size() + " regions, " + "mapped to " + mappedGenes.size() + " genes.");
}
Also used : Study(uk.ac.ebi.spot.goci.model.Study) HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) RiskAllele(uk.ac.ebi.spot.goci.model.RiskAllele) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) EntrezGene(uk.ac.ebi.spot.goci.model.EntrezGene) EnsemblGene(uk.ac.ebi.spot.goci.model.EnsemblGene) Gene(uk.ac.ebi.spot.goci.model.Gene) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SingleNucleotidePolymorphism(uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism) Region(uk.ac.ebi.spot.goci.model.Region) HashSet(java.util.HashSet) Location(uk.ac.ebi.spot.goci.model.Location) Transactional(org.springframework.transaction.annotation.Transactional)

Example 4 with SingleNucleotidePolymorphism

use of uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism in project goci by EBISPOT.

the class FilteringTest method testFilter.

@Test
public void testFilter() {
    SingleNucleotidePolymorphism template = template(SingleNucleotidePolymorphism.class);
    Filter<SingleNucleotidePolymorphism, String> filter = refine(template).on(template.getRsId()).hasValue("rs123456");
    assertEquals("Filter type does not match expected", SingleNucleotidePolymorphism.class, filter.getFilteredType());
    assertEquals("Filtered method does not match expected", "getRsId", filter.getFilteredMethod().getName());
    assertEquals("Filtered value does not match expected", "rs123456", filter.getFilteredValues().get(0));
    Association template2 = template(Association.class);
    Filter<Association, Float> filter2 = refine(template2).on(template2.getPvalueMantissa()).hasValue(Float.valueOf("10"));
    assertEquals("Filter type does not match expected", Association.class, filter2.getFilteredType());
    assertEquals("Filtered method does not match expected", "getPvalueMantissa", filter2.getFilteredMethod().getName());
    assertEquals(Float.valueOf("10"), filter2.getFilteredValues().get(0), 0.0d);
    DateFormat df1 = new SimpleDateFormat("yyyy-MM-dd");
    DateFormat df2 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.S");
    Date from = null;
    Date to = null;
    try {
        from = df1.parse("2005-01-01");
        to = df1.parse("2010-01-01");
    } catch (ParseException e) {
        e.printStackTrace();
    }
    String fromValue = df2.format(from).toString();
    String toValue = df2.format(to).toString();
    System.out.println(fromValue);
    System.out.println(toValue);
    Publication publication = template(Publication.class);
    Filter dateFilter = refine(publication).on(publication.getPublicationDate()).hasRange(fromValue, toValue);
    Filter dateFilter2 = refine(publication).on(publication.getPublicationDate()).hasRange(fromValue, toValue);
    assertEquals("Filter type does not match expected", Publication.class, dateFilter.getFilteredType());
    assertEquals("Filtered method does not match expected", "getPublicationDate", dateFilter.getFilteredMethod().getName());
    assertEquals("Filtered value does not match expected", "2010-01-01T00:00:00.0", dateFilter.getFilteredRange().to());
    assertEquals("Hashcodes of the two date filters differ", dateFilter.hashCode(), dateFilter2.hashCode());
}
Also used : Association(uk.ac.ebi.spot.goci.model.Association) SimpleDateFormat(java.text.SimpleDateFormat) DateFormat(java.text.DateFormat) SingleNucleotidePolymorphism(uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism) Publication(uk.ac.ebi.spot.goci.model.Publication) ParseException(java.text.ParseException) SimpleDateFormat(java.text.SimpleDateFormat) Date(java.util.Date) Test(org.junit.Test)

Example 5 with SingleNucleotidePolymorphism

use of uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism in project goci by EBISPOT.

the class AssociationAttributeService method createSnp.

public SingleNucleotidePolymorphism createSnp(String curatorEnteredSNP) {
    curatorEnteredSNP = StringProcessingService.tidy_curator_entered_string(curatorEnteredSNP);
    SingleNucleotidePolymorphism snp = new SingleNucleotidePolymorphism();
    snp.setRsId(curatorEnteredSNP);
    return snp;
}
Also used : SingleNucleotidePolymorphism(uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism)

Aggregations

SingleNucleotidePolymorphism (uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism)24 ArrayList (java.util.ArrayList)15 RiskAllele (uk.ac.ebi.spot.goci.model.RiskAllele)12 Association (uk.ac.ebi.spot.goci.model.Association)10 Gene (uk.ac.ebi.spot.goci.model.Gene)8 Locus (uk.ac.ebi.spot.goci.model.Locus)7 Location (uk.ac.ebi.spot.goci.model.Location)5 Date (java.util.Date)4 GenomicContext (uk.ac.ebi.spot.goci.model.GenomicContext)4 Study (uk.ac.ebi.spot.goci.model.Study)4 DateFormat (java.text.DateFormat)3 SimpleDateFormat (java.text.SimpleDateFormat)3 Test (org.junit.Test)3 EfoTrait (uk.ac.ebi.spot.goci.model.EfoTrait)3 Region (uk.ac.ebi.spot.goci.model.Region)3 ParseException (java.text.ParseException)2 Collection (java.util.Collection)2 Collectors (java.util.stream.Collectors)2 Autowired (org.springframework.beans.factory.annotation.Autowired)2 Service (org.springframework.stereotype.Service)2