use of ubic.gemma.model.genome.PhysicalLocation in project Gemma by PavlidisLab.
the class GeneCoreServiceTest method testSearchGenes.
@Test
public void testSearchGenes() {
Gene gene = Gene.Factory.newInstance();
Integer id = Integer.parseInt(RandomStringUtils.randomNumeric(5));
gene.setNcbiGeneId(id);
gene.setName("test_search");
gene.setOfficialName("test_search");
gene.setOfficialSymbol("test_search");
Taxon human = taxonService.findByCommonName("human");
gene.setTaxon(human);
PhysicalLocation pl1 = PhysicalLocation.Factory.newInstance();
Chromosome chromosome = new Chromosome("X", null, this.getTestPersistentBioSequence(), human);
chromosome = (Chromosome) persisterHelper.persist(chromosome);
pl1.setChromosome(chromosome);
pl1.setNucleotide(10000010L);
pl1.setNucleotideLength(1001);
pl1.setStrand("-");
gene.setPhysicalLocation(pl1);
gene = geneDao.create(gene);
Collection<GeneValueObject> searchResults = geneCoreService.searchGenes("test_search", 1L);
assertNotNull(searchResults);
GeneValueObject gvo = searchResults.iterator().next();
assertNotNull(gvo);
geneDao.remove(gene);
}
use of ubic.gemma.model.genome.PhysicalLocation in project Gemma by PavlidisLab.
the class NcbiGeneConverter method convert.
public Collection<GeneProduct> convert(NCBIGene2Accession acc, Gene gene) {
Collection<GeneProduct> geneProducts = new HashSet<>();
// RNA section
if (acc.getRnaNucleotideAccession() != null) {
GeneProduct rna = GeneProduct.Factory.newInstance();
// set available fields
rna.setNcbiGi(acc.getRnaNucleotideGI());
rna.setGene(gene);
rna.setName(acc.getRnaNucleotideAccession());
rna.setType(GeneProductType.RNA);
String description = "Imported from NCBI Gene";
if (acc.getStatus() != null) {
description = description + " (Refseq status: " + acc.getStatus() + ").";
}
if (acc.getRnaNucleotideAccession() != null) {
DatabaseEntry accession = DatabaseEntry.Factory.newInstance();
accession.setAccession(acc.getRnaNucleotideAccession());
accession.setAccessionVersion(acc.getRnaNucleotideAccessionVersion());
accession.setExternalDatabase(NcbiGeneConverter.genBank);
if (rna.getAccessions() == null) {
rna.setAccessions(new HashSet<DatabaseEntry>());
}
rna.getAccessions().add(accession);
}
/*
* Fill in physical location details.
*/
if (acc.getGenomicNucleotideAccession() != null && gene.getPhysicalLocation() != null) {
this.getChromosomeDetails(acc, gene);
PhysicalLocation pl = this.getPhysicalLocation(acc, gene);
rna.setPhysicalLocation(pl);
}
rna.setDescription(description);
geneProducts.add(rna);
}
// Protein section
if (NcbiGeneConverter.retainProteinInformation && acc.getProteinAccession() != null) {
GeneProduct protein = GeneProduct.Factory.newInstance();
// set available fields
protein.setNcbiGi(acc.getProteinGI());
protein.setGene(gene);
protein.setName(acc.getProteinAccession());
protein.setType(GeneProductType.PROTEIN);
protein.setDescription("Imported from NCBI Gene" + (acc.getStatus() != null ? " (" + acc.getStatus() + ")" : ""));
DatabaseEntry accession = DatabaseEntry.Factory.newInstance();
accession.setAccession(acc.getProteinAccession());
accession.setAccessionVersion(acc.getProteinAccessionVersion());
accession.setExternalDatabase(NcbiGeneConverter.genBank);
Collection<DatabaseEntry> accessions = new HashSet<>();
accessions.add(accession);
protein.setAccessions(accessions);
geneProducts.add(protein);
}
return geneProducts;
}
use of ubic.gemma.model.genome.PhysicalLocation in project Gemma by PavlidisLab.
the class NcbiGeneConverter method convert.
public Gene convert(NCBIGeneInfo info) {
Gene gene = Gene.Factory.newInstance();
gene.setNcbiGeneId(Integer.parseInt(info.getGeneId()));
gene.setName(info.getDefaultSymbol());
gene.setOfficialSymbol(info.getDefaultSymbol());
gene.setOfficialName(info.getDescription());
gene.setEnsemblId(info.getEnsemblId());
/*
* NOTE we allow multiple discontinued or previous ids, separated by commas. This is a hack to account for cases
* uncovered recently...can be minimized by running this regularly.
*/
if (info.getHistory() != null) {
assert info.getHistory().getCurrentId() == null || info.getGeneId().equals(info.getHistory().getCurrentId());
assert info.getHistory().getPreviousIds() != null;
if (!info.getHistory().getPreviousIds().isEmpty()) {
String previousIds = StringUtils.join(info.getHistory().getPreviousIds(), ",");
gene.setPreviousNcbiId(previousIds);
}
} else if (StringUtils.isNotBlank(info.getDiscontinuedId())) {
if (NcbiGeneConverter.log.isDebugEnabled())
NcbiGeneConverter.log.debug("Gene matches a gene that was discontinued: " + gene + " matches gene that had id " + info.getDiscontinuedId());
gene.setPreviousNcbiId(info.getDiscontinuedId());
}
gene.setDescription("Imported from NCBI gene; Nomenclature status: " + info.getNomenclatureStatus());
Taxon t = Taxon.Factory.newInstance();
t.setNcbiId(info.getTaxId());
t.setIsGenesUsable(false);
t.setIsSpecies(true);
gene.setTaxon(t);
/*
* We are going to stop maintaining this information
*/
PhysicalLocation pl = PhysicalLocation.Factory.newInstance();
Chromosome chrom = new Chromosome(info.getChromosome(), t);
pl.setChromosome(chrom);
gene.setPhysicalLocation(pl);
Collection<GeneAlias> aliases = gene.getAliases();
for (String alias : info.getSynonyms()) {
GeneAlias newAlias = GeneAlias.Factory.newInstance();
newAlias.setAlias(alias);
aliases.add(newAlias);
}
for (String dbname : info.getDbXrefs().keySet()) {
if (!dbname.equalsIgnoreCase("Ensembl"))
continue;
String identifier = info.getDbXrefs().get(dbname);
DatabaseEntry crossref = DatabaseEntry.Factory.newInstance();
crossref.setAccession(identifier);
crossref.setExternalDatabase(NcbiGeneConverter.getEnsembl());
gene.getAccessions().add(crossref);
}
return gene;
}
use of ubic.gemma.model.genome.PhysicalLocation in project Gemma by PavlidisLab.
the class CompositeSequenceDaoImpl method batchGetGenesWithSpecificity.
/**
* @param batch of composite sequences to process
* @param results - adding to this
*/
private void batchGetGenesWithSpecificity(Collection<CompositeSequence> batch, Map<CompositeSequence, Collection<BioSequence2GeneProduct>> results) {
if (batch.size() == 0) {
return;
}
// language=HQL
final String queryString = "select cs,bas from CompositeSequence cs, BioSequence2GeneProduct bas inner join cs.biologicalCharacteristic bs " + "inner join fetch bas.geneProduct gp inner join fetch gp.gene gene " + "where bas.bioSequence=bs and cs in (:cs)";
List<?> qr = this.getHibernateTemplate().findByNamedParam(queryString, "cs", batch);
for (Object o : qr) {
Object[] oa = (Object[]) o;
CompositeSequence csa = (CompositeSequence) oa[0];
BioSequence2GeneProduct ba = (BioSequence2GeneProduct) oa[1];
if (ba instanceof BlatAssociation) {
BlatResult blatResult = ((BlatAssociation) ba).getBlatResult();
PhysicalLocation pl = blatResult.getTargetAlignedRegion();
/*
* We didn't always used to fill in the targetAlignedRegion ... this is just in case.
*/
if (pl == null) {
pl = PhysicalLocation.Factory.newInstance();
pl.setChromosome(blatResult.getTargetChromosome());
pl.setNucleotide(blatResult.getTargetStart());
pl.setNucleotideLength(blatResult.getTargetEnd().intValue() - blatResult.getTargetStart().intValue());
pl.setStrand(blatResult.getStrand());
// Note: not bothering to fill in the bin.
}
}
if (!results.containsKey(csa)) {
results.put(csa, new HashSet<BioSequence2GeneProduct>());
}
results.get(csa).add(ba);
}
/*
* This is kind of important. We ensure we return an empty map for probes that do not have a mapping.
*/
for (CompositeSequence cs : batch) {
if (!results.containsKey(cs)) {
results.put(cs, new HashSet<BioSequence2GeneProduct>());
}
}
}
use of ubic.gemma.model.genome.PhysicalLocation in project Gemma by PavlidisLab.
the class BlatAssociationScorer method clusterGenes.
/**
* @param associations assocs.
* @return map of physical locations for the alignments, and which genes are found there.
*/
private static Map<PhysicalLocation, Collection<Gene>> clusterGenes(Map<Gene, Collection<BlatAssociation>> associations) {
Map<PhysicalLocation, Collection<Gene>> clusters = new HashMap<>();
for (Gene gene : associations.keySet()) {
Collection<BlatAssociation> geneAssoc = associations.get(gene);
for (BlatAssociation ba : geneAssoc) {
PhysicalLocation pl = ba.getBlatResult().getTargetAlignedRegion();
if (!clusters.containsKey(pl)) {
clusters.put(pl, new HashSet<Gene>());
}
clusters.get(pl).add(gene);
}
}
// debugging information about clusters.
if (BlatAssociationScorer.log.isDebugEnabled()) {
for (PhysicalLocation pl : clusters.keySet()) {
if (clusters.get(pl).size() > 1) {
BlatAssociationScorer.log.debug("Cluster at " + pl + " with " + clusters.get(pl).size() + " members:\n" + StringUtils.join(clusters.get(pl).iterator(), "\n"));
}
}
}
return clusters;
}
Aggregations