use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.
the class GeoBrowserServiceImpl method filterGeoRecords.
private List<GeoRecord> filterGeoRecords(List<GeoRecord> records) {
ExternalDatabase geo = externalDatabaseService.findByName("GEO");
Collection<GeoRecord> toRemove = new HashSet<>();
assert geo != null;
rec: for (GeoRecord record : records) {
if (record.getNumSamples() < GeoBrowserServiceImpl.MIN_SAMPLES) {
toRemove.add(record);
}
Collection<String> organisms = record.getOrganisms();
if (organisms == null || organisms.size() == 0) {
continue;
}
int i = 0;
for (String string : organisms) {
Taxon t = taxonService.findByCommonName(string);
if (t == null) {
t = taxonService.findByScientificName(string);
if (t == null) {
toRemove.add(record);
continue rec;
}
}
String acc = record.getGeoAccession();
if (organisms.size() > 1) {
acc = acc + "." + i;
}
DatabaseEntry de = DatabaseEntry.Factory.newInstance();
de.setExternalDatabase(geo);
de.setAccession(acc);
Collection<ExpressionExperiment> ee = expressionExperimentService.findByAccession(de);
if (!ee.isEmpty()) {
for (ExpressionExperiment expressionExperiment : ee) {
record.getCorrespondingExperiments().add(expressionExperiment.getId());
}
}
record.setPreviousClicks(localInfo.containsKey(acc) ? localInfo.get(acc).getPreviousClicks() : 0);
record.setUsable(!localInfo.containsKey(acc) || localInfo.get(acc).isUsable());
i++;
}
}
records.removeAll(toRemove);
return records;
}
use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.
the class GeoServiceImpl method updateReports.
private void updateReports(Collection<?> entities) {
Collection<ArrayDesign> adsToUpdate = new HashSet<>();
for (Object entity : entities) {
if (entity instanceof ExpressionExperiment) {
ExpressionExperiment expressionExperiment = (ExpressionExperiment) entity;
expressionExperiment = this.expressionExperimentService.thaw(expressionExperiment);
this.expressionExperimentReportService.generateSummary(expressionExperiment.getId());
expressionExperiment = this.expressionExperimentService.thaw(expressionExperiment);
for (BioAssay ba : expressionExperiment.getBioAssays()) {
adsToUpdate.add(ba.getArrayDesignUsed());
}
} else if (entity instanceof ArrayDesign) {
adsToUpdate.add((ArrayDesign) entity);
}
}
for (ArrayDesign arrayDesign : adsToUpdate) {
this.arrayDesignReportService.generateArrayDesignReport(arrayDesign.getId());
}
}
use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.
the class GeoServiceImpl method getPubMedInfo.
private void getPubMedInfo(Collection<ExpressionExperiment> result) {
for (ExpressionExperiment experiment : result) {
BibliographicReference pubmed = experiment.getPrimaryPublication();
if (pubmed == null)
continue;
PubMedXMLFetcher fetcher = new PubMedXMLFetcher();
try {
pubmed = fetcher.retrieveByHTTP(Integer.parseInt(pubmed.getPubAccession().getAccession()));
} catch (Exception e) {
AbstractGeoService.log.warn("Filed to get data from pubmed, continuing without it.");
AbstractGeoService.log.error(e, e);
}
if (pubmed == null)
continue;
experiment.setPrimaryPublication(pubmed);
}
}
use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.
the class GeoServiceImpl method fetchAndLoad.
/**
* Given a GEO GSE or GDS (or GPL, but support might not be complete)
* <ol>
* <li>Check that it doesn't already exist in the system</li>
* <li>Download and parse GDS files and GSE file needed</li>
* <li>Convert the GDS and GSE into a ExpressionExperiment (or just the ArrayDesigns)
* <li>Load the resulting ExpressionExperiment and/or ArrayDesigns into Gemma</li>
* </ol>
*/
@Override
public Collection<?> fetchAndLoad(String geoAccession, boolean loadPlatformOnly, boolean doSampleMatching, boolean splitByPlatform, boolean allowSuperSeriesImport, boolean allowSubSeriesImport) {
/*
* We do this to get a fresh instantiation of GeoConverter (prototype scope)
*/
GeoConverter geoConverter = (GeoConverter) this.beanFactory.getBean("geoConverter");
if (this.geoDomainObjectGenerator == null) {
this.geoDomainObjectGenerator = new GeoDomainObjectGenerator();
} else {
this.geoDomainObjectGenerator.initialize();
}
geoDomainObjectGenerator.setProcessPlatformsOnly(geoAccession.startsWith("GPL") || loadPlatformOnly);
geoDomainObjectGenerator.setDoSampleMatching(doSampleMatching && !splitByPlatform);
Collection<DatabaseEntry> projectedAccessions = geoDomainObjectGenerator.getProjectedAccessions(geoAccession);
this.checkForExisting(projectedAccessions);
if (loadPlatformOnly) {
Collection<? extends GeoData> platforms = geoDomainObjectGenerator.generate(geoAccession);
if (platforms.size() == 0) {
AbstractGeoService.log.warn("GeoService.fetchAndLoad( targetPlatformAcc, true, false, false, false );t no results");
return null;
}
geoConverter.setForceConvertElements(true);
Collection<Object> arrayDesigns = geoConverter.convert(platforms);
return persisterHelper.persist(arrayDesigns);
}
Collection<? extends GeoData> parseResult = geoDomainObjectGenerator.generate(geoAccession);
if (parseResult.size() == 0) {
AbstractGeoService.log.warn("Got no results");
return null;
}
AbstractGeoService.log.debug("Generated GEO domain objects for " + geoAccession);
Object obj = parseResult.iterator().next();
if (!(obj instanceof GeoSeries)) {
throw new RuntimeException("Got a " + obj.getClass().getName() + " instead of a " + GeoSeries.class.getName() + " (you may need to load platforms only).");
}
GeoSeries series = (GeoSeries) obj;
String seriesAccession = series.getGeoAccession();
if (series.isSuperSeries()) {
if (allowSuperSeriesImport) {
AbstractGeoService.log.info(" ========= SuperSeries Detected! =========");
AbstractGeoService.log.info("Please make sure you want to import this as a superseries and not the individual subseries");
} else {
throw new IllegalStateException("SuperSeries detected, set 'allowSuperSeriesImport' to 'true' to allow this dataset to load");
}
}
if (series.isSubSeries()) {
if (allowSubSeriesImport) {
AbstractGeoService.log.info(" ========= Subseries Detected! =========");
AbstractGeoService.log.info("Please make sure you want to import this as a subseries and not the superseries");
} else {
throw new IllegalStateException("SubSeries detected, set 'allowSubSeriesImport' to 'true' to allow this dataset to load");
}
}
this.confirmPlatformUniqueness(series, doSampleMatching && !splitByPlatform);
ArrayDesignsForExperimentCache c = new ArrayDesignsForExperimentCache();
this.matchToExistingPlatforms(geoConverter, series, c);
this.checkSamplesAreNew(series);
this.getSubSeriesInformation(series);
geoConverter.clear();
geoConverter.setSplitByPlatform(splitByPlatform);
// noinspection unchecked
Collection<ExpressionExperiment> result = (Collection<ExpressionExperiment>) geoConverter.convert(series);
this.check(result);
this.getPubMedInfo(result);
AbstractGeoService.log.debug("Converted " + seriesAccession);
assert persisterHelper != null;
Collection<ExpressionExperiment> persistedResult = new HashSet<>();
for (ExpressionExperiment ee : result) {
c = expressionExperimentPrePersistService.prepare(ee, c);
ee = persisterHelper.persist(ee, c);
persistedResult.add(ee);
AbstractGeoService.log.debug("Persisted " + seriesAccession);
}
this.updateReports(persistedResult);
return persistedResult;
}
use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorDaoImpl method handleGetProcessedExpressionDataArrays.
/**
* This is an important method for fetching vectors.
*
* @param genes genes
* @param ees ees
* @return vectors, possibly subsetted.
*/
private Collection<DoubleVectorValueObject> handleGetProcessedExpressionDataArrays(Collection<? extends BioAssaySet> ees, Collection<Long> genes) {
// ees must be thawed first as currently implemented (?)
Collection<DoubleVectorValueObject> results = new HashSet<>();
/*
* Check the cache.
*/
Collection<ExpressionExperiment> needToSearch = new HashSet<>();
Collection<Long> genesToSearch = new HashSet<>();
this.checkCache(ees, genes, results, needToSearch, genesToSearch);
AbstractDao.log.info("Using " + results.size() + " DoubleVectorValueObject(s) from cache");
if (needToSearch.size() == 0) {
return results;
}
/*
* Get items not in the cache.
*/
AbstractDao.log.info("Searching for vectors for " + genes.size() + " genes from " + needToSearch.size() + " experiments not in cache");
Collection<ArrayDesign> arrays = CommonQueries.getArrayDesignsUsed(EntityUtils.getIds(this.getExperiments(ees)), this.getSessionFactory().getCurrentSession()).keySet();
assert !arrays.isEmpty();
Map<Long, Collection<Long>> cs2gene = CommonQueries.getCs2GeneIdMap(genesToSearch, EntityUtils.getIds(arrays), this.getSessionFactory().getCurrentSession());
if (cs2gene.size() == 0) {
if (results.isEmpty()) {
AbstractDao.log.warn("No composite sequences found for genes");
return new HashSet<>();
}
return results;
}
/*
* Fill in the map, because we want to track information on the specificity of the probes used in the data
* vectors.
*/
cs2gene = CommonQueries.getCs2GeneMapForProbes(cs2gene.keySet(), this.getSessionFactory().getCurrentSession());
Map<ProcessedExpressionDataVector, Collection<Long>> processedDataVectors = this.getProcessedVectors(EntityUtils.getIds(needToSearch), cs2gene);
Map<BioAssaySet, Collection<BioAssayDimension>> bioAssayDimensions = this.getBioAssayDimensions(needToSearch);
Collection<DoubleVectorValueObject> newResults = new HashSet<>();
/*
* This loop is to ensure that we don't get misaligned vectors for experiments that use more than one array
* design. See bug 1704. This isn't that common, so we try to break out as soon as possible.
*/
for (BioAssaySet bas : needToSearch) {
Collection<BioAssayDimension> dims = bioAssayDimensions.get(bas);
if (dims == null || dims.isEmpty()) {
AbstractDao.log.warn("BioAssayDimensions were null/empty unexpectedly.");
continue;
}
/*
* Get the vectors for just this experiment. This is made more efficient by removing things from the map
* each time through.
*/
Map<ProcessedExpressionDataVector, Collection<Long>> vecsForBas = new HashMap<>();
if (needToSearch.size() == 1) {
vecsForBas = processedDataVectors;
} else {
// isolate the vectors for the current experiment.
for (Iterator<ProcessedExpressionDataVector> it = processedDataVectors.keySet().iterator(); it.hasNext(); ) {
ProcessedExpressionDataVector v = it.next();
if (v.getExpressionExperiment().equals(bas)) {
vecsForBas.put(v, processedDataVectors.get(v));
// since we're done with it.
it.remove();
}
}
}
/*
* Now see if anything is 'ragged' (fewer bioassays per biomaterial than in some other vector)
*/
if (dims.size() == 1) {
newResults.addAll(this.unpack(vecsForBas));
} else {
BioAssayDimension longestBad = this.checkRagged(dims);
if (longestBad == null) {
newResults.addAll(this.unpack(vecsForBas));
} else {
newResults.addAll(this.unpack(vecsForBas, longestBad));
}
}
}
if (!newResults.isEmpty()) {
this.cacheResults(newResults);
newResults = this.sliceSubsets(ees, newResults);
results.addAll(newResults);
}
return results;
}
Aggregations