use of uk.ac.ebi.spot.goci.model.EnsemblGene in project goci by EBISPOT.
the class EnsemblMappingPipeline method addGenomicContext.
/**
* Create GenomicContext objects from the JSONObjects and add them to the class variable "genomic_contexts" (list of
* "GenomicContext" classes)
*
* @param json_gene_list the list of overlapping genes in JSONObject format
* @param snp_location an instance of the Location class (chromosome name and position)
* @param source the source of the data (Ensembl or NCBI)
* @param type the type of genomic context (i.e. overlap, upstream, downstream)
* @return boolean to indicate whether a closest gene has been found or not (only relevant for upstream and
* downstream gene)
*/
private boolean addGenomicContext(JSONArray json_gene_list, Location snp_location, String source, String type) {
String closest_gene = "";
int closest_distance = 0;
boolean intergenic = (type.equals("overlap")) ? false : true;
boolean upstream = (type.equals("upstream")) ? true : false;
boolean downstream = (type.equals("downstream")) ? true : false;
Integer position = snp_location.getChromosomePosition();
SingleNucleotidePolymorphism snp_tmp = new SingleNucleotidePolymorphism();
snp_tmp.setRsId(getEnsemblMappingResult().getRsId());
if (getEnsemblMappingResult().getRsId() == null) {
throw new IllegalArgumentException("error, no RS ID found for location " + snp_location.getId());
}
// Get closest gene
if (intergenic) {
int pos = position;
for (int i = 0; i < json_gene_list.length(); ++i) {
JSONObject json_gene = json_gene_list.getJSONObject(i);
String gene_name = json_gene.getString("external_name");
// If the source is NCBI, we parse the ID from the description:
String gene_id = source.equals(getNcbiSource()) ? parseNCBIid(json_gene.getString("description"), gene_name) : json_gene.getString("id");
if (source.equals(getNcbiSource())) {
if ((gene_name != null && getEnsemblMappingResult().getNcbiOverlappingGene().contains(gene_name)) || gene_name == null) {
// Skip overlapping genes which also overlap upstream and/or downstream of the variant
continue;
}
} else {
if ((gene_name != null && getEnsemblMappingResult().getEnsemblOverlappingGene().contains(gene_name)) || gene_name == null) {
// Skip overlapping genes which also overlap upstream and/or downstream of the variant
continue;
}
}
int distance = 0;
if (type.equals("upstream")) {
distance = pos - json_gene.getInt("end");
} else if (type.equals("downstream")) {
distance = json_gene.getInt("start") - pos;
}
if ((distance < closest_distance && distance > 0) || closest_distance == 0) {
closest_gene = gene_id;
closest_distance = distance;
}
}
}
for (int i = 0; i < json_gene_list.length(); ++i) {
JSONObject json_gene = json_gene_list.getJSONObject(i);
String gene_name = json_gene.getString("external_name");
// If the source is NCBI, we parse the ID from the description:
String gene_id = source.equals(getNcbiSource()) ? parseNCBIid(json_gene.getString("description"), gene_name) : json_gene.getString("id");
String ncbi_id = (source.equals("NCBI")) ? gene_id : null;
String ensembl_id = (source.equals("Ensembl")) ? gene_id : null;
int distance = 0;
if (intergenic) {
if (source.equals(getNcbiSource())) {
if ((gene_name != null && getEnsemblMappingResult().getNcbiOverlappingGene().contains(gene_name)) || gene_name == null) {
// Skip overlapping genes which also overlap upstream and/or downstream of the variant
continue;
}
} else {
if ((gene_name != null && getEnsemblMappingResult().getEnsemblOverlappingGene().contains(gene_name)) || gene_name == null) {
// Skip overlapping genes which also overlap upstream and/or downstream of the variant
continue;
}
}
int pos = position;
if (type.equals("upstream")) {
distance = pos - json_gene.getInt("end");
} else if (type.equals("downstream")) {
distance = json_gene.getInt("start") - pos;
}
}
Long dist = (long) distance;
EntrezGene entrezGene = new EntrezGene();
entrezGene.setEntrezGeneId(ncbi_id);
Collection<EntrezGene> entrezGenes = new ArrayList<>();
entrezGenes.add(entrezGene);
EnsemblGene ensemblGene = new EnsemblGene();
ensemblGene.setEnsemblGeneId(ensembl_id);
Collection<EnsemblGene> ensemblGenes = new ArrayList<>();
ensemblGenes.add(ensemblGene);
Gene gene_object = new Gene(gene_name, entrezGenes, ensemblGenes);
// Check if the gene corresponds to the closest gene
boolean is_closest_gene = (closest_gene.equals(gene_id) && closest_gene != "") ? true : false;
GenomicContext gc = new GenomicContext(intergenic, upstream, downstream, dist, snp_tmp, gene_object, snp_location, source, getMappingMethod(), is_closest_gene);
getEnsemblMappingResult().addGenomicContext(gc);
}
return (closest_gene != "") ? true : false;
}
use of uk.ac.ebi.spot.goci.model.EnsemblGene in project goci by EBISPOT.
the class SnpGenomicContextMappingService method createOrRetrieveEnsemblExternalId.
/**
* Method to create an Ensembl gene, this database table holds ensembl gene IDs
*
* @param id Ensembl gene ID
* @param geneName Gene name allows method to check if this id is actually already linked to another gene
*/
private EnsemblGene createOrRetrieveEnsemblExternalId(String id, String geneName) {
EnsemblGene ensemblGene = ensemblGeneQueryService.findByEnsemblGeneId(id);
// Create new entry in ENSEMBL_GENE table for this ID
if (ensemblGene == null) {
ensemblGene = new EnsemblGene();
ensemblGene.setEnsemblGeneId(id);
ensemblGeneRepository.save(ensemblGene);
} else // Check this ID is not linked to a gene with a different name
{
Gene existingGeneLinkedToId = ensemblGene.getGene();
if (existingGeneLinkedToId != null) {
if (!Objects.equals(existingGeneLinkedToId.getGeneName(), geneName)) {
getLog().warn("Ensembl ID: " + id + ", is already used in database by a different gene(s): " + existingGeneLinkedToId.getGeneName() + ". Will update so links to " + geneName);
// For gene already linked to this ensembl ID remove the ensembl ID
existingGeneLinkedToId.getEnsemblGeneIds().remove(ensemblGene);
geneRepository.save(existingGeneLinkedToId);
}
}
}
return ensemblGene;
}
use of uk.ac.ebi.spot.goci.model.EnsemblGene in project goci by EBISPOT.
the class SnpGenomicContextMappingService method createGene.
/**
* Method to create a gene
*
* @param geneName gene symbol or name
* @param externalIds external gene IDs
* @param source the source of mapping, either Ensembl or Entrez
*/
private Gene createGene(String geneName, Set<String> externalIds, String source) {
// Create new gene
Gene newGene = new Gene();
newGene.setGeneName(geneName);
if (source.equalsIgnoreCase("Ensembl")) {
// Set Ensembl Ids for new gene
Collection<EnsemblGene> ensemblGeneIds = new ArrayList<>();
for (String id : externalIds) {
EnsemblGene ensemblGene = createOrRetrieveEnsemblExternalId(id, geneName);
ensemblGeneIds.add(ensemblGene);
}
newGene.setEnsemblGeneIds(ensemblGeneIds);
}
if (source.equalsIgnoreCase("Entrez")) {
// Set Entrez Ids for new gene
Collection<EntrezGene> entrezGeneIds = new ArrayList<>();
for (String id : externalIds) {
EntrezGene entrezGene = createOrRetrieveEntrezExternalId(id, geneName);
entrezGeneIds.add(entrezGene);
}
newGene.setEntrezGeneIds(entrezGeneIds);
}
// Save gene
getLog().debug("Creating " + source + " gene, with name " + geneName);
// geneRepository.save(newGene);
return newGene;
}
use of uk.ac.ebi.spot.goci.model.EnsemblGene in project goci by EBISPOT.
the class AssociationService method loadAssociatedData.
@Transactional(readOnly = true)
public void loadAssociatedData(Association association) {
int traitCount = association.getEfoTraits().size();
// Study study = studyService.fetchOne(association.getStudy());
Study study = association.getStudy();
AtomicInteger reportedGeneCount = new AtomicInteger();
Collection<SingleNucleotidePolymorphism> snps = new HashSet<>();
Collection<SingleNucleotidePolymorphism> proxySnps = new HashSet<>();
Collection<Region> regions = new HashSet<>();
Collection<Gene> mappedGenes = new HashSet<>();
// Map<String, Set<String>> mappedGeneEntrezIds = new HashMap<>();
Map<String, Set<String>> mappedGeneEnsemblIds = new HashMap<>();
association.getLoci().forEach(locus -> {
locus.getStrongestRiskAlleles().stream().map(RiskAllele::getSnp).forEach(snp -> {
Collection<Location> snpLocations = snp.getLocations();
for (Location location : snpLocations) {
regions.add(location.getRegion());
}
snp.getGenomicContexts().forEach(context -> {
mappedGenes.add(context.getGene());
String geneName = context.getGene().getGeneName();
Collection<EntrezGene> geneEntrezGeneIds = context.getGene().getEntrezGeneIds();
Collection<EnsemblGene> geneEnsemblGeneIds = context.getGene().getEnsemblGeneIds();
if (mappedGeneEnsemblIds.containsKey(geneName)) {
for (EnsemblGene ensemblGene : geneEnsemblGeneIds) {
mappedGeneEnsemblIds.get(geneName).add(ensemblGene.getEnsemblGeneId());
}
} else // First time we see a SNP store the location
{
Set<String> ensemblIds = new HashSet<>();
for (EnsemblGene ensemblGene : geneEnsemblGeneIds) {
ensemblIds.add(ensemblGene.getEnsemblGeneId());
}
// mappedGeneEntrezIds.put(geneName,
// ensemblIds);
mappedGeneEnsemblIds.put(geneName, ensemblIds);
}
});
snps.add(snp);
});
snps.addAll(locus.getStrongestRiskAlleles().stream().map(RiskAllele::getSnp).collect(Collectors.toList()));
locus.getStrongestRiskAlleles().forEach(riskAllele -> {
if (riskAllele.getProxySnps() != null) {
proxySnps.addAll(riskAllele.getProxySnps());
}
});
reportedGeneCount.addAndGet(locus.getAuthorReportedGenes().size());
locus.getAuthorReportedGenes().forEach(authorReportedGene -> {
authorReportedGene.getEnsemblGeneIds().size();
// authorReportedGene.getEntrezGeneIds().size();
});
});
getLog().trace("Association '" + association.getId() + "' is mapped to " + "" + traitCount + " EFO traits where study id = " + study.getId() + " " + "(author reported " + reportedGeneCount + " gene(s)); " + "this reports on " + snps.size() + " SNPs in " + regions.size() + " regions, " + "mapped to " + mappedGenes.size() + " genes.");
}
use of uk.ac.ebi.spot.goci.model.EnsemblGene in project goci by EBISPOT.
the class SnpGenomicContextMappingService method cleanUpEnsemblGenes.
/**
* Method to clean-up an Ensembl gene ID in database that has no linked gene
*
* @param id Ensembl gene ID to delete
*/
private void cleanUpEnsemblGenes(Long id, List<EnsemblGene> ensemblGenesToDelete) {
// Find any genes with this Ensembl ID
EnsemblGene ensemblGene = ensemblGeneRepository.findOne(id);
Gene geneWithEnsemblId = ensemblGene.getGene();
// If this ID is not linked to a gene then delete it
if (geneWithEnsemblId == null) {
// ensemblGeneRepository.delete(id);
ensemblGenesToDelete.add(ensemblGene);
}
}
Aggregations