use of uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism in project goci by EBISPOT.
the class EnsemblMappingPipeline method addGenomicContext.
/**
* Create GenomicContext objects from the JSONObjects and add them to the class variable "genomic_contexts" (list of
* "GenomicContext" classes)
*
* @param json_gene_list the list of overlapping genes in JSONObject format
* @param snp_location an instance of the Location class (chromosome name and position)
* @param source the source of the data (Ensembl or NCBI)
* @param type the type of genomic context (i.e. overlap, upstream, downstream)
* @return boolean to indicate whether a closest gene has been found or not (only relevant for upstream and
* downstream gene)
*/
private boolean addGenomicContext(JSONArray json_gene_list, Location snp_location, String source, String type) {
String closest_gene = "";
int closest_distance = 0;
boolean intergenic = (type.equals("overlap")) ? false : true;
boolean upstream = (type.equals("upstream")) ? true : false;
boolean downstream = (type.equals("downstream")) ? true : false;
Integer position = snp_location.getChromosomePosition();
SingleNucleotidePolymorphism snp_tmp = new SingleNucleotidePolymorphism();
snp_tmp.setRsId(getEnsemblMappingResult().getRsId());
if (getEnsemblMappingResult().getRsId() == null) {
throw new IllegalArgumentException("error, no RS ID found for location " + snp_location.getId());
}
// Get closest gene
if (intergenic) {
int pos = position;
for (int i = 0; i < json_gene_list.length(); ++i) {
JSONObject json_gene = json_gene_list.getJSONObject(i);
String gene_name = json_gene.getString("external_name");
// If the source is NCBI, we parse the ID from the description:
String gene_id = source.equals(getNcbiSource()) ? parseNCBIid(json_gene.getString("description"), gene_name) : json_gene.getString("id");
if (source.equals(getNcbiSource())) {
if ((gene_name != null && getEnsemblMappingResult().getNcbiOverlappingGene().contains(gene_name)) || gene_name == null) {
// Skip overlapping genes which also overlap upstream and/or downstream of the variant
continue;
}
} else {
if ((gene_name != null && getEnsemblMappingResult().getEnsemblOverlappingGene().contains(gene_name)) || gene_name == null) {
// Skip overlapping genes which also overlap upstream and/or downstream of the variant
continue;
}
}
int distance = 0;
if (type.equals("upstream")) {
distance = pos - json_gene.getInt("end");
} else if (type.equals("downstream")) {
distance = json_gene.getInt("start") - pos;
}
if ((distance < closest_distance && distance > 0) || closest_distance == 0) {
closest_gene = gene_id;
closest_distance = distance;
}
}
}
for (int i = 0; i < json_gene_list.length(); ++i) {
JSONObject json_gene = json_gene_list.getJSONObject(i);
String gene_name = json_gene.getString("external_name");
// If the source is NCBI, we parse the ID from the description:
String gene_id = source.equals(getNcbiSource()) ? parseNCBIid(json_gene.getString("description"), gene_name) : json_gene.getString("id");
String ncbi_id = (source.equals("NCBI")) ? gene_id : null;
String ensembl_id = (source.equals("Ensembl")) ? gene_id : null;
int distance = 0;
if (intergenic) {
if (source.equals(getNcbiSource())) {
if ((gene_name != null && getEnsemblMappingResult().getNcbiOverlappingGene().contains(gene_name)) || gene_name == null) {
// Skip overlapping genes which also overlap upstream and/or downstream of the variant
continue;
}
} else {
if ((gene_name != null && getEnsemblMappingResult().getEnsemblOverlappingGene().contains(gene_name)) || gene_name == null) {
// Skip overlapping genes which also overlap upstream and/or downstream of the variant
continue;
}
}
int pos = position;
if (type.equals("upstream")) {
distance = pos - json_gene.getInt("end");
} else if (type.equals("downstream")) {
distance = json_gene.getInt("start") - pos;
}
}
Long dist = (long) distance;
EntrezGene entrezGene = new EntrezGene();
entrezGene.setEntrezGeneId(ncbi_id);
Collection<EntrezGene> entrezGenes = new ArrayList<>();
entrezGenes.add(entrezGene);
EnsemblGene ensemblGene = new EnsemblGene();
ensemblGene.setEnsemblGeneId(ensembl_id);
Collection<EnsemblGene> ensemblGenes = new ArrayList<>();
ensemblGenes.add(ensemblGene);
Gene gene_object = new Gene(gene_name, entrezGenes, ensemblGenes);
// Check if the gene corresponds to the closest gene
boolean is_closest_gene = (closest_gene.equals(gene_id) && closest_gene != "") ? true : false;
GenomicContext gc = new GenomicContext(intergenic, upstream, downstream, dist, snp_tmp, gene_object, snp_location, source, getMappingMethod(), is_closest_gene);
getEnsemblMappingResult().addGenomicContext(gc);
}
return (closest_gene != "") ? true : false;
}
use of uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism in project goci by EBISPOT.
the class SnpGenomicContextMappingService method storeSnpGenomicContext.
/**
* Saves genomic context information to database
*
* @param snpToGenomicContextMap map of rs_id and all genomic context details returned from current mapping run
*/
private void storeSnpGenomicContext(Map<String, Set<GenomicContext>> snpToGenomicContextMap) {
List<SingleNucleotidePolymorphism> updatedSnps = new ArrayList<>();
// Go through each rs_id and its associated genomic contexts returned from the mapping pipeline
for (String snpRsId : snpToGenomicContextMap.keySet()) {
getLog().debug("Storing genomic context for " + snpRsId);
Set<GenomicContext> genomicContextsFromMapping = snpToGenomicContextMap.get(snpRsId);
// Check if the SNP exists
SingleNucleotidePolymorphism snpInDatabase = singleNucleotidePolymorphismRepository.findByRsId(snpRsId);
if (snpInDatabase == null) {
snpInDatabase = singleNucleotidePolymorphismQueryService.findByRsIdIgnoreCase(snpRsId);
}
if (snpInDatabase != null) {
Collection<GenomicContext> newSnpGenomicContexts = new ArrayList<>();
for (GenomicContext genomicContextFromMapping : genomicContextsFromMapping) {
// Gene should already have been created
String geneName = genomicContextFromMapping.getGene().getGeneName().trim();
if (!geneName.equalsIgnoreCase("undefined")) {
// Create new genomic context
Boolean isIntergenic = genomicContextFromMapping.getIsIntergenic();
Boolean isUpstream = genomicContextFromMapping.getIsUpstream();
Boolean isDownstream = genomicContextFromMapping.getIsDownstream();
Long distance = genomicContextFromMapping.getDistance();
String source = genomicContextFromMapping.getSource();
String mappingMethod = genomicContextFromMapping.getMappingMethod();
Boolean isClosestGene = genomicContextFromMapping.getIsClosestGene();
// Location details
String chromosomeName = genomicContextFromMapping.getLocation().getChromosomeName();
Integer chromosomePosition = genomicContextFromMapping.getLocation().getChromosomePosition();
Region regionFromMapping = genomicContextFromMapping.getLocation().getRegion();
String regionName = null;
if (regionFromMapping.getName() != null) {
regionName = regionFromMapping.getName().trim();
}
// Check if location already exists
Location location = locationRepository.findByChromosomeNameAndChromosomePositionAndRegionName(chromosomeName, chromosomePosition, regionName);
if (location == null) {
location = locationCreationService.createLocation(chromosomeName, chromosomePosition, regionName);
}
GenomicContext genomicContext = genomicContextCreationService.createGenomicContext(isIntergenic, isUpstream, isDownstream, distance, source, mappingMethod, geneName, snpInDatabase, isClosestGene, location);
newSnpGenomicContexts.add(genomicContext);
} else {
getLog().warn("Gene name returned from mapping pipeline is 'undefined' for SNP" + snpInDatabase.getRsId());
}
}
// Save latest mapped information
snpInDatabase.setGenomicContexts(newSnpGenomicContexts);
// Update the last update date
snpInDatabase.setLastUpdateDate(new Date());
// singleNucleotidePolymorphismRepository.save(snpInDatabase);
updatedSnps.add(snpInDatabase);
} else // SNP doesn't exist, this should be extremely rare as SNP value is a copy
// of the variant entered by the curator which
// by the time mapping is started should already have been saved
{
// TODO WHAT WILL HAPPEN FOR MERGED SNPS
getLog().error("Adding genomic context for SNP not found in database, RS_ID:" + snpRsId);
throw new RuntimeException("Adding genomic context for SNP not found in database, RS_ID: " + snpRsId);
}
}
singleNucleotidePolymorphismRepository.save(updatedSnps);
}
use of uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism in project goci by EBISPOT.
the class AssociationService method loadAssociatedData.
@Transactional(readOnly = true)
public void loadAssociatedData(Association association) {
int traitCount = association.getEfoTraits().size();
// Study study = studyService.fetchOne(association.getStudy());
Study study = association.getStudy();
AtomicInteger reportedGeneCount = new AtomicInteger();
Collection<SingleNucleotidePolymorphism> snps = new HashSet<>();
Collection<SingleNucleotidePolymorphism> proxySnps = new HashSet<>();
Collection<Region> regions = new HashSet<>();
Collection<Gene> mappedGenes = new HashSet<>();
// Map<String, Set<String>> mappedGeneEntrezIds = new HashMap<>();
Map<String, Set<String>> mappedGeneEnsemblIds = new HashMap<>();
association.getLoci().forEach(locus -> {
locus.getStrongestRiskAlleles().stream().map(RiskAllele::getSnp).forEach(snp -> {
Collection<Location> snpLocations = snp.getLocations();
for (Location location : snpLocations) {
regions.add(location.getRegion());
}
snp.getGenomicContexts().forEach(context -> {
mappedGenes.add(context.getGene());
String geneName = context.getGene().getGeneName();
Collection<EntrezGene> geneEntrezGeneIds = context.getGene().getEntrezGeneIds();
Collection<EnsemblGene> geneEnsemblGeneIds = context.getGene().getEnsemblGeneIds();
if (mappedGeneEnsemblIds.containsKey(geneName)) {
for (EnsemblGene ensemblGene : geneEnsemblGeneIds) {
mappedGeneEnsemblIds.get(geneName).add(ensemblGene.getEnsemblGeneId());
}
} else // First time we see a SNP store the location
{
Set<String> ensemblIds = new HashSet<>();
for (EnsemblGene ensemblGene : geneEnsemblGeneIds) {
ensemblIds.add(ensemblGene.getEnsemblGeneId());
}
// mappedGeneEntrezIds.put(geneName,
// ensemblIds);
mappedGeneEnsemblIds.put(geneName, ensemblIds);
}
});
snps.add(snp);
});
snps.addAll(locus.getStrongestRiskAlleles().stream().map(RiskAllele::getSnp).collect(Collectors.toList()));
locus.getStrongestRiskAlleles().forEach(riskAllele -> {
if (riskAllele.getProxySnps() != null) {
proxySnps.addAll(riskAllele.getProxySnps());
}
});
reportedGeneCount.addAndGet(locus.getAuthorReportedGenes().size());
locus.getAuthorReportedGenes().forEach(authorReportedGene -> {
authorReportedGene.getEnsemblGeneIds().size();
// authorReportedGene.getEntrezGeneIds().size();
});
});
getLog().trace("Association '" + association.getId() + "' is mapped to " + "" + traitCount + " EFO traits where study id = " + study.getId() + " " + "(author reported " + reportedGeneCount + " gene(s)); " + "this reports on " + snps.size() + " SNPs in " + regions.size() + " regions, " + "mapped to " + mappedGenes.size() + " genes.");
}
use of uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism in project goci by EBISPOT.
the class FilteringTest method testFilter.
@Test
public void testFilter() {
SingleNucleotidePolymorphism template = template(SingleNucleotidePolymorphism.class);
Filter<SingleNucleotidePolymorphism, String> filter = refine(template).on(template.getRsId()).hasValue("rs123456");
assertEquals("Filter type does not match expected", SingleNucleotidePolymorphism.class, filter.getFilteredType());
assertEquals("Filtered method does not match expected", "getRsId", filter.getFilteredMethod().getName());
assertEquals("Filtered value does not match expected", "rs123456", filter.getFilteredValues().get(0));
Association template2 = template(Association.class);
Filter<Association, Float> filter2 = refine(template2).on(template2.getPvalueMantissa()).hasValue(Float.valueOf("10"));
assertEquals("Filter type does not match expected", Association.class, filter2.getFilteredType());
assertEquals("Filtered method does not match expected", "getPvalueMantissa", filter2.getFilteredMethod().getName());
assertEquals(Float.valueOf("10"), filter2.getFilteredValues().get(0), 0.0d);
DateFormat df1 = new SimpleDateFormat("yyyy-MM-dd");
DateFormat df2 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.S");
Date from = null;
Date to = null;
try {
from = df1.parse("2005-01-01");
to = df1.parse("2010-01-01");
} catch (ParseException e) {
e.printStackTrace();
}
String fromValue = df2.format(from).toString();
String toValue = df2.format(to).toString();
System.out.println(fromValue);
System.out.println(toValue);
Publication publication = template(Publication.class);
Filter dateFilter = refine(publication).on(publication.getPublicationDate()).hasRange(fromValue, toValue);
Filter dateFilter2 = refine(publication).on(publication.getPublicationDate()).hasRange(fromValue, toValue);
assertEquals("Filter type does not match expected", Publication.class, dateFilter.getFilteredType());
assertEquals("Filtered method does not match expected", "getPublicationDate", dateFilter.getFilteredMethod().getName());
assertEquals("Filtered value does not match expected", "2010-01-01T00:00:00.0", dateFilter.getFilteredRange().to());
assertEquals("Hashcodes of the two date filters differ", dateFilter.hashCode(), dateFilter2.hashCode());
}
use of uk.ac.ebi.spot.goci.model.SingleNucleotidePolymorphism in project goci by EBISPOT.
the class AssociationAttributeService method createSnp.
public SingleNucleotidePolymorphism createSnp(String curatorEnteredSNP) {
curatorEnteredSNP = StringProcessingService.tidy_curator_entered_string(curatorEnteredSNP);
SingleNucleotidePolymorphism snp = new SingleNucleotidePolymorphism();
snp.setRsId(curatorEnteredSNP);
return snp;
}
Aggregations