use of ubic.gemma.model.common.description.DatabaseEntry in project Gemma by PavlidisLab.
the class NcbiGeneConverter method convert.
public Collection<GeneProduct> convert(NCBIGene2Accession acc, Gene gene) {
Collection<GeneProduct> geneProducts = new HashSet<>();
// RNA section
if (acc.getRnaNucleotideAccession() != null) {
GeneProduct rna = GeneProduct.Factory.newInstance();
// set available fields
rna.setNcbiGi(acc.getRnaNucleotideGI());
rna.setGene(gene);
rna.setName(acc.getRnaNucleotideAccession());
rna.setType(GeneProductType.RNA);
String description = "Imported from NCBI Gene";
if (acc.getStatus() != null) {
description = description + " (Refseq status: " + acc.getStatus() + ").";
}
if (acc.getRnaNucleotideAccession() != null) {
DatabaseEntry accession = DatabaseEntry.Factory.newInstance();
accession.setAccession(acc.getRnaNucleotideAccession());
accession.setAccessionVersion(acc.getRnaNucleotideAccessionVersion());
accession.setExternalDatabase(NcbiGeneConverter.genBank);
if (rna.getAccessions() == null) {
rna.setAccessions(new HashSet<DatabaseEntry>());
}
rna.getAccessions().add(accession);
}
/*
* Fill in physical location details.
*/
if (acc.getGenomicNucleotideAccession() != null && gene.getPhysicalLocation() != null) {
this.getChromosomeDetails(acc, gene);
PhysicalLocation pl = this.getPhysicalLocation(acc, gene);
rna.setPhysicalLocation(pl);
}
rna.setDescription(description);
geneProducts.add(rna);
}
// Protein section
if (NcbiGeneConverter.retainProteinInformation && acc.getProteinAccession() != null) {
GeneProduct protein = GeneProduct.Factory.newInstance();
// set available fields
protein.setNcbiGi(acc.getProteinGI());
protein.setGene(gene);
protein.setName(acc.getProteinAccession());
protein.setType(GeneProductType.PROTEIN);
protein.setDescription("Imported from NCBI Gene" + (acc.getStatus() != null ? " (" + acc.getStatus() + ")" : ""));
DatabaseEntry accession = DatabaseEntry.Factory.newInstance();
accession.setAccession(acc.getProteinAccession());
accession.setAccessionVersion(acc.getProteinAccessionVersion());
accession.setExternalDatabase(NcbiGeneConverter.genBank);
Collection<DatabaseEntry> accessions = new HashSet<>();
accessions.add(accession);
protein.setAccessions(accessions);
geneProducts.add(protein);
}
return geneProducts;
}
use of ubic.gemma.model.common.description.DatabaseEntry in project Gemma by PavlidisLab.
the class NcbiGeneConverter method convert.
public Gene convert(NCBIGeneInfo info) {
Gene gene = Gene.Factory.newInstance();
gene.setNcbiGeneId(Integer.parseInt(info.getGeneId()));
gene.setName(info.getDefaultSymbol());
gene.setOfficialSymbol(info.getDefaultSymbol());
gene.setOfficialName(info.getDescription());
gene.setEnsemblId(info.getEnsemblId());
/*
* NOTE we allow multiple discontinued or previous ids, separated by commas. This is a hack to account for cases
* uncovered recently...can be minimized by running this regularly.
*/
if (info.getHistory() != null) {
assert info.getHistory().getCurrentId() == null || info.getGeneId().equals(info.getHistory().getCurrentId());
assert info.getHistory().getPreviousIds() != null;
if (!info.getHistory().getPreviousIds().isEmpty()) {
String previousIds = StringUtils.join(info.getHistory().getPreviousIds(), ",");
gene.setPreviousNcbiId(previousIds);
}
} else if (StringUtils.isNotBlank(info.getDiscontinuedId())) {
if (NcbiGeneConverter.log.isDebugEnabled())
NcbiGeneConverter.log.debug("Gene matches a gene that was discontinued: " + gene + " matches gene that had id " + info.getDiscontinuedId());
gene.setPreviousNcbiId(info.getDiscontinuedId());
}
gene.setDescription("Imported from NCBI gene; Nomenclature status: " + info.getNomenclatureStatus());
Taxon t = Taxon.Factory.newInstance();
t.setNcbiId(info.getTaxId());
t.setIsGenesUsable(false);
t.setIsSpecies(true);
gene.setTaxon(t);
/*
* We are going to stop maintaining this information
*/
PhysicalLocation pl = PhysicalLocation.Factory.newInstance();
Chromosome chrom = new Chromosome(info.getChromosome(), t);
pl.setChromosome(chrom);
gene.setPhysicalLocation(pl);
Collection<GeneAlias> aliases = gene.getAliases();
for (String alias : info.getSynonyms()) {
GeneAlias newAlias = GeneAlias.Factory.newInstance();
newAlias.setAlias(alias);
aliases.add(newAlias);
}
for (String dbname : info.getDbXrefs().keySet()) {
if (!dbname.equalsIgnoreCase("Ensembl"))
continue;
String identifier = info.getDbXrefs().get(dbname);
DatabaseEntry crossref = DatabaseEntry.Factory.newInstance();
crossref.setAccession(identifier);
crossref.setExternalDatabase(NcbiGeneConverter.getEnsembl());
gene.getAccessions().add(crossref);
}
return gene;
}
use of ubic.gemma.model.common.description.DatabaseEntry in project Gemma by PavlidisLab.
the class CompositeSequenceDaoImpl method thaw.
@Override
public void thaw(final Collection<CompositeSequence> compositeSequences) {
HibernateTemplate templ = this.getHibernateTemplate();
templ.executeWithNativeSession(new org.springframework.orm.hibernate3.HibernateCallback<Object>() {
@Override
public Object doInHibernate(org.hibernate.Session session) throws org.hibernate.HibernateException {
int i = 0;
int numToDo = compositeSequences.size();
for (CompositeSequence cs : compositeSequences) {
session.buildLockRequest(LockOptions.NONE).lock(cs);
Hibernate.initialize(cs.getArrayDesign());
BioSequence bs = cs.getBiologicalCharacteristic();
if (bs == null) {
continue;
}
session.buildLockRequest(LockOptions.NONE).lock(bs);
Hibernate.initialize(bs);
Hibernate.initialize(bs.getTaxon());
DatabaseEntry dbEntry = bs.getSequenceDatabaseEntry();
if (dbEntry != null) {
Hibernate.initialize(dbEntry);
Hibernate.initialize(dbEntry.getExternalDatabase());
session.evict(dbEntry);
session.evict(dbEntry.getExternalDatabase());
}
if (bs.getBioSequence2GeneProduct() == null) {
continue;
}
for (BioSequence2GeneProduct bs2gp : bs.getBioSequence2GeneProduct()) {
if (bs2gp == null) {
continue;
}
GeneProduct geneProduct = bs2gp.getGeneProduct();
if (geneProduct != null && geneProduct.getGene() != null) {
Gene g = geneProduct.getGene();
g.getAliases().size();
session.evict(g);
session.evict(geneProduct);
}
}
if (++i % 2000 == 0) {
AbstractDao.log.info("Progress: " + i + "/" + numToDo + "...");
try {
Thread.sleep(10);
} catch (InterruptedException e) {
//
}
}
session.evict(bs);
}
session.clear();
return null;
}
});
}
use of ubic.gemma.model.common.description.DatabaseEntry in project Gemma by PavlidisLab.
the class GeneProductServiceImpl method remove.
@Override
@Transactional
public void remove(Collection<GeneProduct> toRemove) {
Collection<BlatAssociation> associations = this.blatAssociationDao.find(toRemove);
if (!associations.isEmpty()) {
AbstractService.log.info("Removing " + associations.size() + " blat associations involving up to " + toRemove.size() + " products.");
this.blatAssociationDao.remove(associations);
}
Collection<AnnotationAssociation> annotationAssociations = this.annotationAssociationDao.find(toRemove);
if (!annotationAssociations.isEmpty()) {
AbstractService.log.info("Removing " + annotationAssociations.size() + " annotationAssociations involving up to " + toRemove.size() + " products.");
this.annotationAssociationDao.remove(annotationAssociations);
}
// remove associations to database entries that are still associated with sequences.
for (GeneProduct gp : toRemove) {
gp = this.thaw(gp);
Collection<DatabaseEntry> accessions = gp.getAccessions();
Collection<DatabaseEntry> toRelease = new HashSet<>();
for (DatabaseEntry de : accessions) {
if (this.bioSequenceDao.findByAccession(de) != null) {
toRelease.add(de);
}
}
gp.getAccessions().removeAll(toRelease);
this.geneProductDao.remove(gp);
}
}
use of ubic.gemma.model.common.description.DatabaseEntry in project Gemma by PavlidisLab.
the class CompositeSequenceParser method parseOneLine.
@Override
public CompositeSequence parseOneLine(String line) {
String[] tokens = StringUtils.splitPreserveAllTokens(line, '\t');
if (tokens.length != 3) {
return null;
}
String probeid = tokens[0];
String genbankAcc = tokens[1];
String description = tokens[2];
CompositeSequence result = CompositeSequence.Factory.newInstance();
result.setName(probeid);
result.setDescription(description);
DatabaseEntry dbEntry = ExternalDatabaseUtils.getGenbankAccession(genbankAcc);
BioSequence biologicalCharacteristic = BioSequence.Factory.newInstance();
// this will be changed later, typically.
biologicalCharacteristic.setName(genbankAcc);
// this will be changed later, typically.
biologicalCharacteristic.setDescription(description + " (From platform source)");
biologicalCharacteristic.setSequenceDatabaseEntry(dbEntry);
result.setBiologicalCharacteristic(biologicalCharacteristic);
return result;
}
Aggregations