use of ubic.gemma.model.common.description.DatabaseEntry in project Gemma by PavlidisLab.
the class BatchInfoPopulationServiceImpl method fetchRawDataFiles.
/**
* Currently only supports GEO
*
* @param ee ee
* @return local file
*/
private Collection<LocalFile> fetchRawDataFiles(ExpressionExperiment ee) {
RawDataFetcher fetcher = new RawDataFetcher();
DatabaseEntry accession = ee.getAccession();
if (accession == null) {
BatchInfoPopulationServiceImpl.log.warn("No accession for " + ee.getShortName());
return new HashSet<>();
}
return fetcher.fetch(accession.getAccession());
}
use of ubic.gemma.model.common.description.DatabaseEntry in project Gemma by PavlidisLab.
the class BioSequencePersistTest method onSetUpInTransaction.
@Before
public void onSetUpInTransaction() {
bs = BioSequence.Factory.newInstance();
Taxon t = Taxon.Factory.newInstance();
t.setCommonName("mouse");
t.setIsSpecies(true);
t.setIsGenesUsable(true);
bs.setTaxon(t);
ExternalDatabase ed = ExternalDatabase.Factory.newInstance();
ed.setName("Genbank");
DatabaseEntry de = DatabaseEntry.Factory.newInstance();
de.setExternalDatabase(ed);
de.setAccession(RandomStringUtils.randomAlphanumeric(10));
bs.setName(RandomStringUtils.randomAlphanumeric(10));
bs.setSequenceDatabaseEntry(de);
}
use of ubic.gemma.model.common.description.DatabaseEntry in project Gemma by PavlidisLab.
the class NCBIGeneLoadingTest method testGeneLoader.
@Test
public void testGeneLoader() throws Exception {
NcbiGeneLoader loader = new NcbiGeneLoader(persisterHelper);
loader.setTaxonService(taxonService);
String geneInfoTestFile = "/data/loader/genome/gene/gene_info.human.sample";
String gene2AccTestFile = "/data/loader/genome/gene/gene2accession.human.sample";
String geneHistoryFile = "/data/loader/genome/gene/gene_history.human.sample";
// threaded load
Taxon ta = taxonService.findByCommonName("human");
assertNotNull(ta);
loader.load(FileTools.resourceToPath(geneInfoTestFile), FileTools.resourceToPath(gene2AccTestFile), FileTools.resourceToPath(geneHistoryFile), null, ta);
// wait until the loader is done.
while (!loader.isLoaderDone()) {
Thread.sleep(100);
}
// loader is done.
// check if it loaded elements to the database
log.debug("Loader done with number of elements: " + loader.getLoadedGeneCount());
assertEquals(51, loader.getLoadedGeneCount());
// grab one gene and check its information
// (depends on information in gene_info and gene2accession file
// gene_info
Collection<Gene> geneCollection = geneService.findByOfficialSymbol("A2M");
assertEquals(1, geneCollection.size());
g = geneCollection.iterator().next();
g = geneService.thaw(g);
Collection<GeneProduct> products = g.getProducts();
Collection<String> expectedAccessions = new ArrayList<>();
Collection<String> hasAccessions = new ArrayList<>();
expectedAccessions.add("AB209614.2");
expectedAccessions.add("AK307832.1");
for (GeneProduct product : products) {
Collection<DatabaseEntry> accessions = product.getAccessions();
for (DatabaseEntry de : accessions) {
String accession = de.getAccession();
String accVersion = de.getAccessionVersion();
hasAccessions.add(accession + "." + accVersion);
log.debug(accession + "." + accVersion);
}
}
assertEquals(12, hasAccessions.size());
assertTrue(hasAccessions.containsAll(expectedAccessions));
Taxon t = g.getTaxon();
assertEquals(9606, t.getNcbiId().intValue());
assertEquals(new Integer(2), g.getNcbiGeneId());
/*
* Test history change. One gene has been updated, from 7003 to 44444 (fake), and mimic adding ensembl
*/
geneInfoTestFile = "/data/loader/genome/gene/gene_info.human.changed.sample";
gene2AccTestFile = "/data/loader/genome/gene/gene2accession.human.changed.sample";
String updatedHistory = "/data/loader/genome/gene/gene_history.human.changed.sample";
String geneEnsemblFile = "/data/loader/genome/gene/gene2ensembl.human.sample";
loader.load(FileTools.resourceToPath(geneInfoTestFile), FileTools.resourceToPath(gene2AccTestFile), FileTools.resourceToPath(updatedHistory), FileTools.resourceToPath(geneEnsemblFile), ta);
// wait until the loader is done.
while (!loader.isLoaderDone()) {
Thread.sleep(100);
}
Collection<Gene> updatedTestGene = geneService.findByOfficialSymbol("TEAD1");
assertEquals(1, updatedTestGene.size());
g = updatedTestGene.iterator().next();
assertEquals("7003", g.getPreviousNcbiId());
assertEquals(new Integer(44444), g.getNcbiGeneId());
g = geneService.findByNCBIId(1);
assertEquals("ENSG00000121410", g.getEnsemblId());
// test remove...
geneProductService.remove(products);
}
use of ubic.gemma.model.common.description.DatabaseEntry in project Gemma by PavlidisLab.
the class ArrayDesignSequenceProcessingServiceImpl method createOrUpdateGenbankSequence.
/**
* @param found a new (non-persistent) biosequence that can be used to create a new entry or update an existing one
* with the sequence. The sequence would have come from Genbank.
* @param force If true, if an existing BioSequence that matches if found in the system, any existing sequence
* information in the BioSequence will be overwritten. Otherwise, the sequence will only be updated if the
* actual sequence information was missing in our DB and 'found' has a sequence.
* @return persistent BioSequence.
*/
private BioSequence createOrUpdateGenbankSequence(BioSequence found, boolean force) {
assert found != null;
DatabaseEntry sequenceDatabaseEntry = found.getSequenceDatabaseEntry();
// this should always be the case because the sequences comes from
assert sequenceDatabaseEntry != null;
// genbank (blastDb)
assert sequenceDatabaseEntry.getExternalDatabase() != null;
BioSequence existing;
existing = bioSequenceService.findByAccession(sequenceDatabaseEntry);
BioSequence result;
if (existing == null) {
if (ArrayDesignSequenceProcessingServiceImpl.log.isDebugEnabled())
ArrayDesignSequenceProcessingServiceImpl.log.debug("Find (or creating) new sequence " + found);
// there still might be a match.
result = bioSequenceService.find(found);
if (result == null) {
result = bioSequenceService.create(found);
}
} else {
result = existing;
}
assert result != null;
// note that no matter what we make sure the database entry is filled in.
if (force || (StringUtils.isBlank(result.getSequence()) && !StringUtils.isBlank(found.getSequence()))) {
result = this.updateExistingWithSequenceData(found, result);
} else {
this.fillInDatabaseEntry(found, result);
}
return result;
}
use of ubic.gemma.model.common.description.DatabaseEntry in project Gemma by PavlidisLab.
the class GeoDomainObjectGenerator method getProjectedAccessions.
/**
* Determine the set of external accession values that will be generated during parsing. This can be used to
* pre-empty time-consuming fetch and download of data we already have.
*
* @param geoAccession geo accession
* @return database entries
*/
public Collection<DatabaseEntry> getProjectedAccessions(String geoAccession) {
ExternalDatabase ed = ExternalDatabase.Factory.newInstance();
ed.setName("GEO");
Collection<DatabaseEntry> accessions = new HashSet<>();
// DatabaseEntry
StringBuilder seriesAccession = new StringBuilder("");
if (geoAccession.startsWith("GSE")) {
seriesAccession = new StringBuilder(geoAccession);
} else if (geoAccession.startsWith("GPL")) {
GeoDomainObjectGenerator.log.warn("Determining if the data already exist for a GPL (" + geoAccession + ") is not implemented.");
return null;
} else if (geoAccession.startsWith("GDS")) {
Collection<String> seriesAccessions = DatasetCombiner.findGSEforGDS(geoAccession);
if (seriesAccessions == null || seriesAccessions.size() == 0) {
throw new InvalidAccessionException("There is no series (GSE) for the accession " + geoAccession);
}
for (String string : seriesAccessions) {
seriesAccession.append(string).append(",");
}
seriesAccession = new StringBuilder(StringUtils.removeEnd(seriesAccession.toString(), ","));
} else {
if (StringUtils.isBlank(geoAccession)) {
throw new InvalidAccessionException("GEO accession must not be blank. Enter a GSE, GDS or GPL");
}
throw new InvalidAccessionException("'" + geoAccession + "' is not understood by Gemma; must be a GSE, GDS or GPL. Did you choose the right source database?");
}
DatabaseEntry de = DatabaseEntry.Factory.newInstance(ed);
de.setAccession(seriesAccession.toString());
accessions.add(de);
return accessions;
}
Aggregations