Search in sources :

Example 21 with DatabaseEntry

use of ubic.gemma.model.common.description.DatabaseEntry in project Gemma by PavlidisLab.

the class BatchInfoPopulationServiceImpl method fetchRawDataFiles.

/**
 * Currently only supports GEO
 *
 * @param ee ee
 * @return local file
 */
private Collection<LocalFile> fetchRawDataFiles(ExpressionExperiment ee) {
    RawDataFetcher fetcher = new RawDataFetcher();
    DatabaseEntry accession = ee.getAccession();
    if (accession == null) {
        BatchInfoPopulationServiceImpl.log.warn("No accession for " + ee.getShortName());
        return new HashSet<>();
    }
    return fetcher.fetch(accession.getAccession());
}
Also used : DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) RawDataFetcher(ubic.gemma.core.loader.expression.geo.fetcher.RawDataFetcher)

Example 22 with DatabaseEntry

use of ubic.gemma.model.common.description.DatabaseEntry in project Gemma by PavlidisLab.

the class BioSequencePersistTest method onSetUpInTransaction.

@Before
public void onSetUpInTransaction() {
    bs = BioSequence.Factory.newInstance();
    Taxon t = Taxon.Factory.newInstance();
    t.setCommonName("mouse");
    t.setIsSpecies(true);
    t.setIsGenesUsable(true);
    bs.setTaxon(t);
    ExternalDatabase ed = ExternalDatabase.Factory.newInstance();
    ed.setName("Genbank");
    DatabaseEntry de = DatabaseEntry.Factory.newInstance();
    de.setExternalDatabase(ed);
    de.setAccession(RandomStringUtils.randomAlphanumeric(10));
    bs.setName(RandomStringUtils.randomAlphanumeric(10));
    bs.setSequenceDatabaseEntry(de);
}
Also used : ExternalDatabase(ubic.gemma.model.common.description.ExternalDatabase) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) Before(org.junit.Before)

Example 23 with DatabaseEntry

use of ubic.gemma.model.common.description.DatabaseEntry in project Gemma by PavlidisLab.

the class NCBIGeneLoadingTest method testGeneLoader.

@Test
public void testGeneLoader() throws Exception {
    NcbiGeneLoader loader = new NcbiGeneLoader(persisterHelper);
    loader.setTaxonService(taxonService);
    String geneInfoTestFile = "/data/loader/genome/gene/gene_info.human.sample";
    String gene2AccTestFile = "/data/loader/genome/gene/gene2accession.human.sample";
    String geneHistoryFile = "/data/loader/genome/gene/gene_history.human.sample";
    // threaded load
    Taxon ta = taxonService.findByCommonName("human");
    assertNotNull(ta);
    loader.load(FileTools.resourceToPath(geneInfoTestFile), FileTools.resourceToPath(gene2AccTestFile), FileTools.resourceToPath(geneHistoryFile), null, ta);
    // wait until the loader is done.
    while (!loader.isLoaderDone()) {
        Thread.sleep(100);
    }
    // loader is done.
    // check if it loaded elements to the database
    log.debug("Loader done with number of elements: " + loader.getLoadedGeneCount());
    assertEquals(51, loader.getLoadedGeneCount());
    // grab one gene and check its information
    // (depends on information in gene_info and gene2accession file
    // gene_info
    Collection<Gene> geneCollection = geneService.findByOfficialSymbol("A2M");
    assertEquals(1, geneCollection.size());
    g = geneCollection.iterator().next();
    g = geneService.thaw(g);
    Collection<GeneProduct> products = g.getProducts();
    Collection<String> expectedAccessions = new ArrayList<>();
    Collection<String> hasAccessions = new ArrayList<>();
    expectedAccessions.add("AB209614.2");
    expectedAccessions.add("AK307832.1");
    for (GeneProduct product : products) {
        Collection<DatabaseEntry> accessions = product.getAccessions();
        for (DatabaseEntry de : accessions) {
            String accession = de.getAccession();
            String accVersion = de.getAccessionVersion();
            hasAccessions.add(accession + "." + accVersion);
            log.debug(accession + "." + accVersion);
        }
    }
    assertEquals(12, hasAccessions.size());
    assertTrue(hasAccessions.containsAll(expectedAccessions));
    Taxon t = g.getTaxon();
    assertEquals(9606, t.getNcbiId().intValue());
    assertEquals(new Integer(2), g.getNcbiGeneId());
    /*
         * Test history change. One gene has been updated, from 7003 to 44444 (fake), and mimic adding ensembl
         */
    geneInfoTestFile = "/data/loader/genome/gene/gene_info.human.changed.sample";
    gene2AccTestFile = "/data/loader/genome/gene/gene2accession.human.changed.sample";
    String updatedHistory = "/data/loader/genome/gene/gene_history.human.changed.sample";
    String geneEnsemblFile = "/data/loader/genome/gene/gene2ensembl.human.sample";
    loader.load(FileTools.resourceToPath(geneInfoTestFile), FileTools.resourceToPath(gene2AccTestFile), FileTools.resourceToPath(updatedHistory), FileTools.resourceToPath(geneEnsemblFile), ta);
    // wait until the loader is done.
    while (!loader.isLoaderDone()) {
        Thread.sleep(100);
    }
    Collection<Gene> updatedTestGene = geneService.findByOfficialSymbol("TEAD1");
    assertEquals(1, updatedTestGene.size());
    g = updatedTestGene.iterator().next();
    assertEquals("7003", g.getPreviousNcbiId());
    assertEquals(new Integer(44444), g.getNcbiGeneId());
    g = geneService.findByNCBIId(1);
    assertEquals("ENSG00000121410", g.getEnsemblId());
    // test remove...
    geneProductService.remove(products);
}
Also used : GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) Gene(ubic.gemma.model.genome.Gene) Taxon(ubic.gemma.model.genome.Taxon) ArrayList(java.util.ArrayList) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Example 24 with DatabaseEntry

use of ubic.gemma.model.common.description.DatabaseEntry in project Gemma by PavlidisLab.

the class ArrayDesignSequenceProcessingServiceImpl method createOrUpdateGenbankSequence.

/**
 * @param found a new (non-persistent) biosequence that can be used to create a new entry or update an existing one
 *              with the sequence. The sequence would have come from Genbank.
 * @param force If true, if an existing BioSequence that matches if found in the system, any existing sequence
 *              information in the BioSequence will be overwritten. Otherwise, the sequence will only be updated if the
 *              actual sequence information was missing in our DB and 'found' has a sequence.
 * @return persistent BioSequence.
 */
private BioSequence createOrUpdateGenbankSequence(BioSequence found, boolean force) {
    assert found != null;
    DatabaseEntry sequenceDatabaseEntry = found.getSequenceDatabaseEntry();
    // this should always be the case because the sequences comes from
    assert sequenceDatabaseEntry != null;
    // genbank (blastDb)
    assert sequenceDatabaseEntry.getExternalDatabase() != null;
    BioSequence existing;
    existing = bioSequenceService.findByAccession(sequenceDatabaseEntry);
    BioSequence result;
    if (existing == null) {
        if (ArrayDesignSequenceProcessingServiceImpl.log.isDebugEnabled())
            ArrayDesignSequenceProcessingServiceImpl.log.debug("Find (or creating) new sequence " + found);
        // there still might be a match.
        result = bioSequenceService.find(found);
        if (result == null) {
            result = bioSequenceService.create(found);
        }
    } else {
        result = existing;
    }
    assert result != null;
    // note that no matter what we make sure the database entry is filled in.
    if (force || (StringUtils.isBlank(result.getSequence()) && !StringUtils.isBlank(found.getSequence()))) {
        result = this.updateExistingWithSequenceData(found, result);
    } else {
        this.fillInDatabaseEntry(found, result);
    }
    return result;
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry)

Example 25 with DatabaseEntry

use of ubic.gemma.model.common.description.DatabaseEntry in project Gemma by PavlidisLab.

the class GeoDomainObjectGenerator method getProjectedAccessions.

/**
 * Determine the set of external accession values that will be generated during parsing. This can be used to
 * pre-empty time-consuming fetch and download of data we already have.
 *
 * @param geoAccession geo accession
 * @return database entries
 */
public Collection<DatabaseEntry> getProjectedAccessions(String geoAccession) {
    ExternalDatabase ed = ExternalDatabase.Factory.newInstance();
    ed.setName("GEO");
    Collection<DatabaseEntry> accessions = new HashSet<>();
    // DatabaseEntry
    StringBuilder seriesAccession = new StringBuilder("");
    if (geoAccession.startsWith("GSE")) {
        seriesAccession = new StringBuilder(geoAccession);
    } else if (geoAccession.startsWith("GPL")) {
        GeoDomainObjectGenerator.log.warn("Determining if the data already exist for a GPL (" + geoAccession + ") is not implemented.");
        return null;
    } else if (geoAccession.startsWith("GDS")) {
        Collection<String> seriesAccessions = DatasetCombiner.findGSEforGDS(geoAccession);
        if (seriesAccessions == null || seriesAccessions.size() == 0) {
            throw new InvalidAccessionException("There is no series (GSE) for the accession " + geoAccession);
        }
        for (String string : seriesAccessions) {
            seriesAccession.append(string).append(",");
        }
        seriesAccession = new StringBuilder(StringUtils.removeEnd(seriesAccession.toString(), ","));
    } else {
        if (StringUtils.isBlank(geoAccession)) {
            throw new InvalidAccessionException("GEO accession must not be blank. Enter a  GSE, GDS or GPL");
        }
        throw new InvalidAccessionException("'" + geoAccession + "' is not understood by Gemma; must be a GSE, GDS or GPL. Did you choose the right source database?");
    }
    DatabaseEntry de = DatabaseEntry.Factory.newInstance(ed);
    de.setAccession(seriesAccession.toString());
    accessions.add(de);
    return accessions;
}
Also used : ExternalDatabase(ubic.gemma.model.common.description.ExternalDatabase) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) HashSet(java.util.HashSet)

Aggregations

DatabaseEntry (ubic.gemma.model.common.description.DatabaseEntry)37 ExternalDatabase (ubic.gemma.model.common.description.ExternalDatabase)11 GeneProduct (ubic.gemma.model.genome.gene.GeneProduct)8 HashSet (java.util.HashSet)6 Test (org.junit.Test)6 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)6 Gene (ubic.gemma.model.genome.Gene)6 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)6 Taxon (ubic.gemma.model.genome.Taxon)5 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)4 BioSequence2GeneProduct (ubic.gemma.model.association.BioSequence2GeneProduct)4 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)4 HashMap (java.util.HashMap)3 BibliographicReference (ubic.gemma.model.common.description.BibliographicReference)3 AnnotationAssociation (ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Before (org.junit.Before)2 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)2 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)2