Search in sources :

Example 6 with ExternalDatabase

use of ubic.gemma.model.common.description.ExternalDatabase in project Gemma by PavlidisLab.

the class GeneServiceTest method testFindByAccessionOther.

@Test
public void testFindByAccessionOther() {
    Gene gene = Gene.Factory.newInstance();
    Integer id = Integer.parseInt(RandomStringUtils.randomNumeric(5));
    gene.setNcbiGeneId(id);
    gene.setName(GeneServiceTest.TEST_GENE_NAME);
    ExternalDatabase ensembl = edbs.findByName("Ensembl");
    DatabaseEntry dbe = DatabaseEntry.Factory.newInstance();
    dbe.setAccession("E129458");
    dbe.setExternalDatabase(ensembl);
    gene.getAccessions().add(dbe);
    Taxon human = taxonService.findByCommonName("human");
    gene.setTaxon(human);
    geneDao.create(gene);
    Gene g = geneDao.findByAccession("E129458", ensembl);
    assertNotNull(g);
    assertEquals(g, gene);
    geneDao.remove(gene);
}
Also used : Gene(ubic.gemma.model.genome.Gene) ExternalDatabase(ubic.gemma.model.common.description.ExternalDatabase) Taxon(ubic.gemma.model.genome.Taxon) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Example 7 with ExternalDatabase

use of ubic.gemma.model.common.description.ExternalDatabase in project Gemma by PavlidisLab.

the class ArrayDesignSequenceAlignmentServiceImpl method processArrayDesign.

@Override
public Collection<BlatResult> processArrayDesign(ArrayDesign ad, Taxon taxon, Collection<BlatResult> rawBlatResults) {
    ArrayDesignSequenceAlignmentServiceImpl.log.info("Looking for old results to remove...");
    ad = arrayDesignService.thaw(ad);
    arrayDesignService.deleteAlignmentData(ad);
    // Blat file processing can only be run on one taxon at a time
    taxon = this.validateTaxaForBlatFile(ad, taxon);
    Collection<BioSequence> sequencesToBlat = ArrayDesignSequenceAlignmentServiceImpl.getSequences(ad);
    sequencesToBlat = bioSequenceService.thaw(sequencesToBlat);
    // if the blat results were loaded from a file, we have to replace the
    // query sequences with the actual ones
    // attached to the array design. We have to do this by name because the
    // sequence name is what the files contain.
    // Note that if there is ambiguity there will be problems!
    Map<String, BioSequence> seqMap = new HashMap<>();
    for (BioSequence bioSequence : sequencesToBlat) {
        seqMap.put(bioSequence.getName(), bioSequence);
    }
    ExternalDatabase searchedDatabase = ShellDelegatingBlat.getSearchedGenome(taxon);
    Collection<BlatResult> toSkip = new HashSet<>();
    for (BlatResult result : rawBlatResults) {
        /*
             * If the sequences don't have ids, replace them with the actual sequences associated with the array design.
             */
        if (result.getQuerySequence().getId() == null) {
            String querySeqName = result.getQuerySequence().getName();
            BioSequence actualSequence = seqMap.get(querySeqName);
            if (actualSequence == null) {
                ArrayDesignSequenceAlignmentServiceImpl.log.debug("Array design does not contain a sequence with name " + querySeqName);
                toSkip.add(result);
                continue;
            }
            result.setQuerySequence(actualSequence);
        } else {
            result.getQuerySequence().setTaxon(taxon);
        }
        result.setSearchedDatabase(searchedDatabase);
        try {
            FieldUtils.writeField(result.getTargetChromosome(), "taxon", taxon, true);
        } catch (IllegalAccessException e) {
            e.printStackTrace();
        }
        result.getTargetChromosome().getSequence().setTaxon(taxon);
    }
    if (toSkip.size() > 0) {
        ArrayDesignSequenceAlignmentServiceImpl.log.warn(toSkip.size() + " blat results were for sequences not on " + ad + "; they will be ignored.");
        rawBlatResults.removeAll(toSkip);
    }
    Map<BioSequence, Collection<BlatResult>> goldenPathAlignments = new HashMap<>();
    this.getGoldenPathAlignments(sequencesToBlat, taxon, goldenPathAlignments);
    for (BioSequence sequence : goldenPathAlignments.keySet()) {
        rawBlatResults.addAll(goldenPathAlignments.get(sequence));
    }
    Collection<BlatResult> results = this.persistBlatResults(rawBlatResults);
    arrayDesignReportService.generateArrayDesignReport(ad.getId());
    return results;
}
Also used : ExternalDatabase(ubic.gemma.model.common.description.ExternalDatabase) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) HashMap(java.util.HashMap) Collection(java.util.Collection) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult) HashSet(java.util.HashSet)

Example 8 with ExternalDatabase

use of ubic.gemma.model.common.description.ExternalDatabase in project Gemma by PavlidisLab.

the class GeoBrowserServiceImpl method filterGeoRecords.

private List<GeoRecord> filterGeoRecords(List<GeoRecord> records) {
    ExternalDatabase geo = externalDatabaseService.findByName("GEO");
    Collection<GeoRecord> toRemove = new HashSet<>();
    assert geo != null;
    rec: for (GeoRecord record : records) {
        if (record.getNumSamples() < GeoBrowserServiceImpl.MIN_SAMPLES) {
            toRemove.add(record);
        }
        Collection<String> organisms = record.getOrganisms();
        if (organisms == null || organisms.size() == 0) {
            continue;
        }
        int i = 0;
        for (String string : organisms) {
            Taxon t = taxonService.findByCommonName(string);
            if (t == null) {
                t = taxonService.findByScientificName(string);
                if (t == null) {
                    toRemove.add(record);
                    continue rec;
                }
            }
            String acc = record.getGeoAccession();
            if (organisms.size() > 1) {
                acc = acc + "." + i;
            }
            DatabaseEntry de = DatabaseEntry.Factory.newInstance();
            de.setExternalDatabase(geo);
            de.setAccession(acc);
            Collection<ExpressionExperiment> ee = expressionExperimentService.findByAccession(de);
            if (!ee.isEmpty()) {
                for (ExpressionExperiment expressionExperiment : ee) {
                    record.getCorrespondingExperiments().add(expressionExperiment.getId());
                }
            }
            record.setPreviousClicks(localInfo.containsKey(acc) ? localInfo.get(acc).getPreviousClicks() : 0);
            record.setUsable(!localInfo.containsKey(acc) || localInfo.get(acc).isUsable());
            i++;
        }
    }
    records.removeAll(toRemove);
    return records;
}
Also used : GeoRecord(ubic.gemma.core.loader.expression.geo.model.GeoRecord) ExternalDatabase(ubic.gemma.model.common.description.ExternalDatabase) Taxon(ubic.gemma.model.genome.Taxon) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment)

Example 9 with ExternalDatabase

use of ubic.gemma.model.common.description.ExternalDatabase in project Gemma by PavlidisLab.

the class PhenotypeAssociationDaoImpl method loadStatisticsOnExternalDatabases.

@Override
public Collection<ExternalDatabaseStatisticsValueObject> loadStatisticsOnExternalDatabases(String downloadPath) {
    HashMap<String, ExternalDatabaseStatisticsValueObject> externalDatabasesStatistics = new HashMap<>();
    // noinspection unchecked
    List<Object[]> numEvidence = this.getHibernateTemplate().find("select p.evidenceSource.externalDatabase, count (*), p.lastUpdated from PhenotypeAssociation " + "as p group by p.evidenceSource.externalDatabase order by p.lastUpdated desc");
    for (Object[] o : numEvidence) {
        ExternalDatabase externalDatabase = (ExternalDatabase) o[0];
        Long count = (Long) o[1];
        ExternalDatabaseStatisticsValueObject externalDatabaseStatistics = new ExternalDatabaseStatisticsValueObject();
        externalDatabaseStatistics.setDescription(externalDatabase.getDescription());
        externalDatabaseStatistics.setName(externalDatabase.getName());
        externalDatabaseStatistics.setPathToDownloadFile(downloadPath + externalDatabase.getName().replaceAll(" ", "") + ".tsv");
        externalDatabaseStatistics.setLastUpdateDate((Date) o[2]);
        externalDatabaseStatistics.setWebUri(externalDatabase.getWebUri());
        externalDatabaseStatistics.setNumEvidence(count);
        externalDatabasesStatistics.put(externalDatabase.getName(), externalDatabaseStatistics);
    }
    // noinspection unchecked
    List<Object[]> numGenes = this.getHibernateTemplate().find("select p.evidenceSource.externalDatabase.name, count (distinct g) from Gene as g join g.phenotypeAssociations " + "as p group by p.evidenceSource.externalDatabase");
    for (Object[] o : numGenes) {
        String externalDatabaseName = (String) o[0];
        externalDatabasesStatistics.get(externalDatabaseName).setNumGenes((Long) o[1]);
    }
    // noinspection unchecked
    List<Object[]> numPhenotypes = this.getHibernateTemplate().find("select p.evidenceSource.externalDatabase.name, count (distinct c.valueUri) " + "from PhenotypeAssociation as p join p.phenotypes as c " + "group by p.evidenceSource.externalDatabase");
    for (Object[] o : numPhenotypes) {
        String externalDatabaseName = (String) o[0];
        externalDatabasesStatistics.get(externalDatabaseName).setNumPhenotypes((Long) o[1]);
    }
    // noinspection unchecked
    List<Object[]> numPublications = this.getHibernateTemplate().find("select p.evidenceSource.externalDatabase.name, count (distinct pub.citation.pubAccession.accession) " + "from PhenotypeAssociation as p join p.phenotypeAssociationPublications as pub" + " group by p.evidenceSource.externalDatabase");
    for (Object[] o : numPublications) {
        String externalDatabaseName = (String) o[0];
        externalDatabasesStatistics.get(externalDatabaseName).addNumPublications((Long) o[1]);
    }
    return externalDatabasesStatistics.values();
}
Also used : ExternalDatabaseStatisticsValueObject(ubic.gemma.model.genome.gene.phenotype.valueObject.ExternalDatabaseStatisticsValueObject) ExternalDatabase(ubic.gemma.model.common.description.ExternalDatabase) ExternalDatabaseStatisticsValueObject(ubic.gemma.model.genome.gene.phenotype.valueObject.ExternalDatabaseStatisticsValueObject) GeneValueObject(ubic.gemma.model.genome.gene.GeneValueObject) PhenotypeValueObject(ubic.gemma.model.genome.gene.phenotype.valueObject.PhenotypeValueObject) GeneEvidenceValueObject(ubic.gemma.model.genome.gene.phenotype.valueObject.GeneEvidenceValueObject) CharacteristicValueObject(ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject)

Example 10 with ExternalDatabase

use of ubic.gemma.model.common.description.ExternalDatabase in project Gemma by PavlidisLab.

the class ExpressionExperimentBibRefFinder method locatePrimaryReference.

public BibliographicReference locatePrimaryReference(ExpressionExperiment ee) {
    if (ee.getPrimaryPublication() != null)
        return ee.getPrimaryPublication();
    DatabaseEntry accession = ee.getAccession();
    ExternalDatabase ed = accession.getExternalDatabase();
    if (!ed.getName().equals("GEO")) {
        ExpressionExperimentBibRefFinder.log.warn("Don't know how to get references for non-GEO data sets");
        return null;
    }
    String geoId = accession.getAccession();
    int pubMedId = this.locatePubMedId(geoId);
    if (pubMedId < 0)
        return null;
    PubMedXMLFetcher fetcher = new PubMedXMLFetcher();
    return fetcher.retrieveByHTTP(pubMedId);
}
Also used : ExternalDatabase(ubic.gemma.model.common.description.ExternalDatabase) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry)

Aggregations

ExternalDatabase (ubic.gemma.model.common.description.ExternalDatabase)23 DatabaseEntry (ubic.gemma.model.common.description.DatabaseEntry)11 Taxon (ubic.gemma.model.genome.Taxon)5 Test (org.junit.Test)4 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)4 HashMap (java.util.HashMap)3 Gene (ubic.gemma.model.genome.Gene)3 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)3 BlatResult (ubic.gemma.model.genome.sequenceAnalysis.BlatResult)3 IOException (java.io.IOException)2 Collection (java.util.Collection)2 HashSet (java.util.HashSet)2 Before (org.junit.Before)2 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)2 BibliographicReference (ubic.gemma.model.common.description.BibliographicReference)2 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)2 JSONObject (com.sdicons.json.model.JSONObject)1 File (java.io.File)1 ArrayList (java.util.ArrayList)1 GeoRecord (ubic.gemma.core.loader.expression.geo.model.GeoRecord)1