use of uk.ac.ebi.spot.goci.model.EntrezGene in project goci by EBISPOT.
the class EnsemblMappingPipeline method addGenomicContext.
/**
* Create GenomicContext objects from the JSONObjects and add them to the class variable "genomic_contexts" (list of
* "GenomicContext" classes)
*
* @param json_gene_list the list of overlapping genes in JSONObject format
* @param snp_location an instance of the Location class (chromosome name and position)
* @param source the source of the data (Ensembl or NCBI)
* @param type the type of genomic context (i.e. overlap, upstream, downstream)
* @return boolean to indicate whether a closest gene has been found or not (only relevant for upstream and
* downstream gene)
*/
private boolean addGenomicContext(JSONArray json_gene_list, Location snp_location, String source, String type) {
String closest_gene = "";
int closest_distance = 0;
boolean intergenic = (type.equals("overlap")) ? false : true;
boolean upstream = (type.equals("upstream")) ? true : false;
boolean downstream = (type.equals("downstream")) ? true : false;
Integer position = snp_location.getChromosomePosition();
SingleNucleotidePolymorphism snp_tmp = new SingleNucleotidePolymorphism();
snp_tmp.setRsId(getEnsemblMappingResult().getRsId());
if (getEnsemblMappingResult().getRsId() == null) {
throw new IllegalArgumentException("error, no RS ID found for location " + snp_location.getId());
}
// Get closest gene
if (intergenic) {
int pos = position;
for (int i = 0; i < json_gene_list.length(); ++i) {
JSONObject json_gene = json_gene_list.getJSONObject(i);
String gene_name = json_gene.getString("external_name");
// If the source is NCBI, we parse the ID from the description:
String gene_id = source.equals(getNcbiSource()) ? parseNCBIid(json_gene.getString("description"), gene_name) : json_gene.getString("id");
if (source.equals(getNcbiSource())) {
if ((gene_name != null && getEnsemblMappingResult().getNcbiOverlappingGene().contains(gene_name)) || gene_name == null) {
// Skip overlapping genes which also overlap upstream and/or downstream of the variant
continue;
}
} else {
if ((gene_name != null && getEnsemblMappingResult().getEnsemblOverlappingGene().contains(gene_name)) || gene_name == null) {
// Skip overlapping genes which also overlap upstream and/or downstream of the variant
continue;
}
}
int distance = 0;
if (type.equals("upstream")) {
distance = pos - json_gene.getInt("end");
} else if (type.equals("downstream")) {
distance = json_gene.getInt("start") - pos;
}
if ((distance < closest_distance && distance > 0) || closest_distance == 0) {
closest_gene = gene_id;
closest_distance = distance;
}
}
}
for (int i = 0; i < json_gene_list.length(); ++i) {
JSONObject json_gene = json_gene_list.getJSONObject(i);
String gene_name = json_gene.getString("external_name");
// If the source is NCBI, we parse the ID from the description:
String gene_id = source.equals(getNcbiSource()) ? parseNCBIid(json_gene.getString("description"), gene_name) : json_gene.getString("id");
String ncbi_id = (source.equals("NCBI")) ? gene_id : null;
String ensembl_id = (source.equals("Ensembl")) ? gene_id : null;
int distance = 0;
if (intergenic) {
if (source.equals(getNcbiSource())) {
if ((gene_name != null && getEnsemblMappingResult().getNcbiOverlappingGene().contains(gene_name)) || gene_name == null) {
// Skip overlapping genes which also overlap upstream and/or downstream of the variant
continue;
}
} else {
if ((gene_name != null && getEnsemblMappingResult().getEnsemblOverlappingGene().contains(gene_name)) || gene_name == null) {
// Skip overlapping genes which also overlap upstream and/or downstream of the variant
continue;
}
}
int pos = position;
if (type.equals("upstream")) {
distance = pos - json_gene.getInt("end");
} else if (type.equals("downstream")) {
distance = json_gene.getInt("start") - pos;
}
}
Long dist = (long) distance;
EntrezGene entrezGene = new EntrezGene();
entrezGene.setEntrezGeneId(ncbi_id);
Collection<EntrezGene> entrezGenes = new ArrayList<>();
entrezGenes.add(entrezGene);
EnsemblGene ensemblGene = new EnsemblGene();
ensemblGene.setEnsemblGeneId(ensembl_id);
Collection<EnsemblGene> ensemblGenes = new ArrayList<>();
ensemblGenes.add(ensemblGene);
Gene gene_object = new Gene(gene_name, entrezGenes, ensemblGenes);
// Check if the gene corresponds to the closest gene
boolean is_closest_gene = (closest_gene.equals(gene_id) && closest_gene != "") ? true : false;
GenomicContext gc = new GenomicContext(intergenic, upstream, downstream, dist, snp_tmp, gene_object, snp_location, source, getMappingMethod(), is_closest_gene);
getEnsemblMappingResult().addGenomicContext(gc);
}
return (closest_gene != "") ? true : false;
}
use of uk.ac.ebi.spot.goci.model.EntrezGene in project goci by EBISPOT.
the class SnpGenomicContextMappingService method createGene.
/**
* Method to create a gene
*
* @param geneName gene symbol or name
* @param externalIds external gene IDs
* @param source the source of mapping, either Ensembl or Entrez
*/
private Gene createGene(String geneName, Set<String> externalIds, String source) {
// Create new gene
Gene newGene = new Gene();
newGene.setGeneName(geneName);
if (source.equalsIgnoreCase("Ensembl")) {
// Set Ensembl Ids for new gene
Collection<EnsemblGene> ensemblGeneIds = new ArrayList<>();
for (String id : externalIds) {
EnsemblGene ensemblGene = createOrRetrieveEnsemblExternalId(id, geneName);
ensemblGeneIds.add(ensemblGene);
}
newGene.setEnsemblGeneIds(ensemblGeneIds);
}
if (source.equalsIgnoreCase("Entrez")) {
// Set Entrez Ids for new gene
Collection<EntrezGene> entrezGeneIds = new ArrayList<>();
for (String id : externalIds) {
EntrezGene entrezGene = createOrRetrieveEntrezExternalId(id, geneName);
entrezGeneIds.add(entrezGene);
}
newGene.setEntrezGeneIds(entrezGeneIds);
}
// Save gene
getLog().debug("Creating " + source + " gene, with name " + geneName);
// geneRepository.save(newGene);
return newGene;
}
use of uk.ac.ebi.spot.goci.model.EntrezGene in project goci by EBISPOT.
the class SnpGenomicContextMappingService method cleanUpEntrezGenes.
/**
* Method to clean-up an Entrez gene ID in database that has no linked gene
*
* @param id Entrez gene ID to delete
*/
private void cleanUpEntrezGenes(Long id, List<EntrezGene> entrezGenesToDelete) {
// Find any genes with this Entrez ID
EntrezGene entrezGene = entrezGeneRepository.findOne(id);
Gene geneWithEntrezIds = entrezGene.getGene();
// If this ID is not linked to a gene then delete it
if (geneWithEntrezIds == null) {
// entrezGeneRepository.delete(id);
entrezGenesToDelete.add(entrezGene);
}
}
use of uk.ac.ebi.spot.goci.model.EntrezGene in project goci by EBISPOT.
the class AssociationService method loadAssociatedData.
@Transactional(readOnly = true)
public void loadAssociatedData(Association association) {
int traitCount = association.getEfoTraits().size();
// Study study = studyService.fetchOne(association.getStudy());
Study study = association.getStudy();
AtomicInteger reportedGeneCount = new AtomicInteger();
Collection<SingleNucleotidePolymorphism> snps = new HashSet<>();
Collection<SingleNucleotidePolymorphism> proxySnps = new HashSet<>();
Collection<Region> regions = new HashSet<>();
Collection<Gene> mappedGenes = new HashSet<>();
// Map<String, Set<String>> mappedGeneEntrezIds = new HashMap<>();
Map<String, Set<String>> mappedGeneEnsemblIds = new HashMap<>();
association.getLoci().forEach(locus -> {
locus.getStrongestRiskAlleles().stream().map(RiskAllele::getSnp).forEach(snp -> {
Collection<Location> snpLocations = snp.getLocations();
for (Location location : snpLocations) {
regions.add(location.getRegion());
}
snp.getGenomicContexts().forEach(context -> {
mappedGenes.add(context.getGene());
String geneName = context.getGene().getGeneName();
Collection<EntrezGene> geneEntrezGeneIds = context.getGene().getEntrezGeneIds();
Collection<EnsemblGene> geneEnsemblGeneIds = context.getGene().getEnsemblGeneIds();
if (mappedGeneEnsemblIds.containsKey(geneName)) {
for (EnsemblGene ensemblGene : geneEnsemblGeneIds) {
mappedGeneEnsemblIds.get(geneName).add(ensemblGene.getEnsemblGeneId());
}
} else // First time we see a SNP store the location
{
Set<String> ensemblIds = new HashSet<>();
for (EnsemblGene ensemblGene : geneEnsemblGeneIds) {
ensemblIds.add(ensemblGene.getEnsemblGeneId());
}
// mappedGeneEntrezIds.put(geneName,
// ensemblIds);
mappedGeneEnsemblIds.put(geneName, ensemblIds);
}
});
snps.add(snp);
});
snps.addAll(locus.getStrongestRiskAlleles().stream().map(RiskAllele::getSnp).collect(Collectors.toList()));
locus.getStrongestRiskAlleles().forEach(riskAllele -> {
if (riskAllele.getProxySnps() != null) {
proxySnps.addAll(riskAllele.getProxySnps());
}
});
reportedGeneCount.addAndGet(locus.getAuthorReportedGenes().size());
locus.getAuthorReportedGenes().forEach(authorReportedGene -> {
authorReportedGene.getEnsemblGeneIds().size();
// authorReportedGene.getEntrezGeneIds().size();
});
});
getLog().trace("Association '" + association.getId() + "' is mapped to " + "" + traitCount + " EFO traits where study id = " + study.getId() + " " + "(author reported " + reportedGeneCount + " gene(s)); " + "this reports on " + snps.size() + " SNPs in " + regions.size() + " regions, " + "mapped to " + mappedGenes.size() + " genes.");
}
use of uk.ac.ebi.spot.goci.model.EntrezGene in project goci by EBISPOT.
the class SnpGenomicContextMappingService method processGenes.
/**
* Extract gene information from genomic contexts returned from mapping pipeline
*
* @param genomicContexts object holding gene and snp mapping information
*/
private void processGenes(Collection<GenomicContext> genomicContexts) {
getLog().debug("Processing genes...");
// Need to flatten down genomic context gene information
// and create structure linking each gene symbol to its
// complete set of current Ensembl and Entrez IDs
Map<String, Set<String>> geneToEnsemblIdMap = new HashMap<>();
Map<String, Set<String>> geneToEntrezIdMap = new HashMap<>();
// Loop over each genomic context and store information on external IDs linked to gene symbol
for (GenomicContext genomicContext : genomicContexts) {
// Check gene exists
String geneName = genomicContext.getGene().getGeneName().trim();
if (!geneName.equalsIgnoreCase("undefined")) {
// Retrieve the latest Ensembl/Entrez IDs for the named gene from the latest mapping run
Collection<EnsemblGene> ensemblGeneIds = genomicContext.getGene().getEnsemblGeneIds();
for (EnsemblGene ensemblGene : ensemblGeneIds) {
String ensemblId = ensemblGene.getEnsemblGeneId();
if (ensemblId != null) {
if (geneToEnsemblIdMap.containsKey(geneName)) {
geneToEnsemblIdMap.get(geneName).add(ensemblId);
} else {
Set<String> ensemblGeneIdsSet = new HashSet<>();
ensemblGeneIdsSet.add(ensemblId);
geneToEnsemblIdMap.put(geneName, ensemblGeneIdsSet);
}
}
}
Collection<EntrezGene> entrezGeneIds = genomicContext.getGene().getEntrezGeneIds();
for (EntrezGene entrezGene : entrezGeneIds) {
String entrezId = entrezGene.getEntrezGeneId();
if (entrezId != null) {
if (geneToEntrezIdMap.containsKey(geneName)) {
geneToEntrezIdMap.get(geneName).add(entrezId);
} else {
Set<String> entrezGeneIdsSet = new HashSet<>();
entrezGeneIdsSet.add(entrezId);
geneToEntrezIdMap.put(geneName, entrezGeneIdsSet);
}
}
}
}
}
// Store genes, source is required so we know what table to add them to
if (geneToEnsemblIdMap.size() > 0) {
storeGenes(geneToEnsemblIdMap, "Ensembl");
}
if (geneToEntrezIdMap.size() > 0) {
storeGenes(geneToEntrezIdMap, "Entrez");
}
}
Aggregations