use of ubic.gemma.model.common.description.ExternalDatabase in project Gemma by PavlidisLab.
the class GeneServiceTest method testFindByAccessionOther.
@Test
public void testFindByAccessionOther() {
Gene gene = Gene.Factory.newInstance();
Integer id = Integer.parseInt(RandomStringUtils.randomNumeric(5));
gene.setNcbiGeneId(id);
gene.setName(GeneServiceTest.TEST_GENE_NAME);
ExternalDatabase ensembl = edbs.findByName("Ensembl");
DatabaseEntry dbe = DatabaseEntry.Factory.newInstance();
dbe.setAccession("E129458");
dbe.setExternalDatabase(ensembl);
gene.getAccessions().add(dbe);
Taxon human = taxonService.findByCommonName("human");
gene.setTaxon(human);
geneDao.create(gene);
Gene g = geneDao.findByAccession("E129458", ensembl);
assertNotNull(g);
assertEquals(g, gene);
geneDao.remove(gene);
}
use of ubic.gemma.model.common.description.ExternalDatabase in project Gemma by PavlidisLab.
the class ArrayDesignSequenceAlignmentServiceImpl method processArrayDesign.
@Override
public Collection<BlatResult> processArrayDesign(ArrayDesign ad, Taxon taxon, Collection<BlatResult> rawBlatResults) {
ArrayDesignSequenceAlignmentServiceImpl.log.info("Looking for old results to remove...");
ad = arrayDesignService.thaw(ad);
arrayDesignService.deleteAlignmentData(ad);
// Blat file processing can only be run on one taxon at a time
taxon = this.validateTaxaForBlatFile(ad, taxon);
Collection<BioSequence> sequencesToBlat = ArrayDesignSequenceAlignmentServiceImpl.getSequences(ad);
sequencesToBlat = bioSequenceService.thaw(sequencesToBlat);
// if the blat results were loaded from a file, we have to replace the
// query sequences with the actual ones
// attached to the array design. We have to do this by name because the
// sequence name is what the files contain.
// Note that if there is ambiguity there will be problems!
Map<String, BioSequence> seqMap = new HashMap<>();
for (BioSequence bioSequence : sequencesToBlat) {
seqMap.put(bioSequence.getName(), bioSequence);
}
ExternalDatabase searchedDatabase = ShellDelegatingBlat.getSearchedGenome(taxon);
Collection<BlatResult> toSkip = new HashSet<>();
for (BlatResult result : rawBlatResults) {
/*
* If the sequences don't have ids, replace them with the actual sequences associated with the array design.
*/
if (result.getQuerySequence().getId() == null) {
String querySeqName = result.getQuerySequence().getName();
BioSequence actualSequence = seqMap.get(querySeqName);
if (actualSequence == null) {
ArrayDesignSequenceAlignmentServiceImpl.log.debug("Array design does not contain a sequence with name " + querySeqName);
toSkip.add(result);
continue;
}
result.setQuerySequence(actualSequence);
} else {
result.getQuerySequence().setTaxon(taxon);
}
result.setSearchedDatabase(searchedDatabase);
try {
FieldUtils.writeField(result.getTargetChromosome(), "taxon", taxon, true);
} catch (IllegalAccessException e) {
e.printStackTrace();
}
result.getTargetChromosome().getSequence().setTaxon(taxon);
}
if (toSkip.size() > 0) {
ArrayDesignSequenceAlignmentServiceImpl.log.warn(toSkip.size() + " blat results were for sequences not on " + ad + "; they will be ignored.");
rawBlatResults.removeAll(toSkip);
}
Map<BioSequence, Collection<BlatResult>> goldenPathAlignments = new HashMap<>();
this.getGoldenPathAlignments(sequencesToBlat, taxon, goldenPathAlignments);
for (BioSequence sequence : goldenPathAlignments.keySet()) {
rawBlatResults.addAll(goldenPathAlignments.get(sequence));
}
Collection<BlatResult> results = this.persistBlatResults(rawBlatResults);
arrayDesignReportService.generateArrayDesignReport(ad.getId());
return results;
}
use of ubic.gemma.model.common.description.ExternalDatabase in project Gemma by PavlidisLab.
the class GeoBrowserServiceImpl method filterGeoRecords.
private List<GeoRecord> filterGeoRecords(List<GeoRecord> records) {
ExternalDatabase geo = externalDatabaseService.findByName("GEO");
Collection<GeoRecord> toRemove = new HashSet<>();
assert geo != null;
rec: for (GeoRecord record : records) {
if (record.getNumSamples() < GeoBrowserServiceImpl.MIN_SAMPLES) {
toRemove.add(record);
}
Collection<String> organisms = record.getOrganisms();
if (organisms == null || organisms.size() == 0) {
continue;
}
int i = 0;
for (String string : organisms) {
Taxon t = taxonService.findByCommonName(string);
if (t == null) {
t = taxonService.findByScientificName(string);
if (t == null) {
toRemove.add(record);
continue rec;
}
}
String acc = record.getGeoAccession();
if (organisms.size() > 1) {
acc = acc + "." + i;
}
DatabaseEntry de = DatabaseEntry.Factory.newInstance();
de.setExternalDatabase(geo);
de.setAccession(acc);
Collection<ExpressionExperiment> ee = expressionExperimentService.findByAccession(de);
if (!ee.isEmpty()) {
for (ExpressionExperiment expressionExperiment : ee) {
record.getCorrespondingExperiments().add(expressionExperiment.getId());
}
}
record.setPreviousClicks(localInfo.containsKey(acc) ? localInfo.get(acc).getPreviousClicks() : 0);
record.setUsable(!localInfo.containsKey(acc) || localInfo.get(acc).isUsable());
i++;
}
}
records.removeAll(toRemove);
return records;
}
use of ubic.gemma.model.common.description.ExternalDatabase in project Gemma by PavlidisLab.
the class PhenotypeAssociationDaoImpl method loadStatisticsOnExternalDatabases.
@Override
public Collection<ExternalDatabaseStatisticsValueObject> loadStatisticsOnExternalDatabases(String downloadPath) {
HashMap<String, ExternalDatabaseStatisticsValueObject> externalDatabasesStatistics = new HashMap<>();
// noinspection unchecked
List<Object[]> numEvidence = this.getHibernateTemplate().find("select p.evidenceSource.externalDatabase, count (*), p.lastUpdated from PhenotypeAssociation " + "as p group by p.evidenceSource.externalDatabase order by p.lastUpdated desc");
for (Object[] o : numEvidence) {
ExternalDatabase externalDatabase = (ExternalDatabase) o[0];
Long count = (Long) o[1];
ExternalDatabaseStatisticsValueObject externalDatabaseStatistics = new ExternalDatabaseStatisticsValueObject();
externalDatabaseStatistics.setDescription(externalDatabase.getDescription());
externalDatabaseStatistics.setName(externalDatabase.getName());
externalDatabaseStatistics.setPathToDownloadFile(downloadPath + externalDatabase.getName().replaceAll(" ", "") + ".tsv");
externalDatabaseStatistics.setLastUpdateDate((Date) o[2]);
externalDatabaseStatistics.setWebUri(externalDatabase.getWebUri());
externalDatabaseStatistics.setNumEvidence(count);
externalDatabasesStatistics.put(externalDatabase.getName(), externalDatabaseStatistics);
}
// noinspection unchecked
List<Object[]> numGenes = this.getHibernateTemplate().find("select p.evidenceSource.externalDatabase.name, count (distinct g) from Gene as g join g.phenotypeAssociations " + "as p group by p.evidenceSource.externalDatabase");
for (Object[] o : numGenes) {
String externalDatabaseName = (String) o[0];
externalDatabasesStatistics.get(externalDatabaseName).setNumGenes((Long) o[1]);
}
// noinspection unchecked
List<Object[]> numPhenotypes = this.getHibernateTemplate().find("select p.evidenceSource.externalDatabase.name, count (distinct c.valueUri) " + "from PhenotypeAssociation as p join p.phenotypes as c " + "group by p.evidenceSource.externalDatabase");
for (Object[] o : numPhenotypes) {
String externalDatabaseName = (String) o[0];
externalDatabasesStatistics.get(externalDatabaseName).setNumPhenotypes((Long) o[1]);
}
// noinspection unchecked
List<Object[]> numPublications = this.getHibernateTemplate().find("select p.evidenceSource.externalDatabase.name, count (distinct pub.citation.pubAccession.accession) " + "from PhenotypeAssociation as p join p.phenotypeAssociationPublications as pub" + " group by p.evidenceSource.externalDatabase");
for (Object[] o : numPublications) {
String externalDatabaseName = (String) o[0];
externalDatabasesStatistics.get(externalDatabaseName).addNumPublications((Long) o[1]);
}
return externalDatabasesStatistics.values();
}
use of ubic.gemma.model.common.description.ExternalDatabase in project Gemma by PavlidisLab.
the class ExpressionExperimentBibRefFinder method locatePrimaryReference.
public BibliographicReference locatePrimaryReference(ExpressionExperiment ee) {
if (ee.getPrimaryPublication() != null)
return ee.getPrimaryPublication();
DatabaseEntry accession = ee.getAccession();
ExternalDatabase ed = accession.getExternalDatabase();
if (!ed.getName().equals("GEO")) {
ExpressionExperimentBibRefFinder.log.warn("Don't know how to get references for non-GEO data sets");
return null;
}
String geoId = accession.getAccession();
int pubMedId = this.locatePubMedId(geoId);
if (pubMedId < 0)
return null;
PubMedXMLFetcher fetcher = new PubMedXMLFetcher();
return fetcher.retrieveByHTTP(pubMedId);
}
Aggregations