Search in sources :

Example 91 with ExpressionExperiment

use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.

the class GeoBrowserServiceImpl method filterGeoRecords.

private List<GeoRecord> filterGeoRecords(List<GeoRecord> records) {
    ExternalDatabase geo = externalDatabaseService.findByName("GEO");
    Collection<GeoRecord> toRemove = new HashSet<>();
    assert geo != null;
    rec: for (GeoRecord record : records) {
        if (record.getNumSamples() < GeoBrowserServiceImpl.MIN_SAMPLES) {
            toRemove.add(record);
        }
        Collection<String> organisms = record.getOrganisms();
        if (organisms == null || organisms.size() == 0) {
            continue;
        }
        int i = 0;
        for (String string : organisms) {
            Taxon t = taxonService.findByCommonName(string);
            if (t == null) {
                t = taxonService.findByScientificName(string);
                if (t == null) {
                    toRemove.add(record);
                    continue rec;
                }
            }
            String acc = record.getGeoAccession();
            if (organisms.size() > 1) {
                acc = acc + "." + i;
            }
            DatabaseEntry de = DatabaseEntry.Factory.newInstance();
            de.setExternalDatabase(geo);
            de.setAccession(acc);
            Collection<ExpressionExperiment> ee = expressionExperimentService.findByAccession(de);
            if (!ee.isEmpty()) {
                for (ExpressionExperiment expressionExperiment : ee) {
                    record.getCorrespondingExperiments().add(expressionExperiment.getId());
                }
            }
            record.setPreviousClicks(localInfo.containsKey(acc) ? localInfo.get(acc).getPreviousClicks() : 0);
            record.setUsable(!localInfo.containsKey(acc) || localInfo.get(acc).isUsable());
            i++;
        }
    }
    records.removeAll(toRemove);
    return records;
}
Also used : GeoRecord(ubic.gemma.core.loader.expression.geo.model.GeoRecord) ExternalDatabase(ubic.gemma.model.common.description.ExternalDatabase) Taxon(ubic.gemma.model.genome.Taxon) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment)

Example 92 with ExpressionExperiment

use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.

the class GeoServiceImpl method updateReports.

private void updateReports(Collection<?> entities) {
    Collection<ArrayDesign> adsToUpdate = new HashSet<>();
    for (Object entity : entities) {
        if (entity instanceof ExpressionExperiment) {
            ExpressionExperiment expressionExperiment = (ExpressionExperiment) entity;
            expressionExperiment = this.expressionExperimentService.thaw(expressionExperiment);
            this.expressionExperimentReportService.generateSummary(expressionExperiment.getId());
            expressionExperiment = this.expressionExperimentService.thaw(expressionExperiment);
            for (BioAssay ba : expressionExperiment.getBioAssays()) {
                adsToUpdate.add(ba.getArrayDesignUsed());
            }
        } else if (entity instanceof ArrayDesign) {
            adsToUpdate.add((ArrayDesign) entity);
        }
    }
    for (ArrayDesign arrayDesign : adsToUpdate) {
        this.arrayDesignReportService.generateArrayDesignReport(arrayDesign.getId());
    }
}
Also used : ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 93 with ExpressionExperiment

use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.

the class GeoServiceImpl method getPubMedInfo.

private void getPubMedInfo(Collection<ExpressionExperiment> result) {
    for (ExpressionExperiment experiment : result) {
        BibliographicReference pubmed = experiment.getPrimaryPublication();
        if (pubmed == null)
            continue;
        PubMedXMLFetcher fetcher = new PubMedXMLFetcher();
        try {
            pubmed = fetcher.retrieveByHTTP(Integer.parseInt(pubmed.getPubAccession().getAccession()));
        } catch (Exception e) {
            AbstractGeoService.log.warn("Filed to get data from pubmed, continuing without it.");
            AbstractGeoService.log.error(e, e);
        }
        if (pubmed == null)
            continue;
        experiment.setPrimaryPublication(pubmed);
    }
}
Also used : PubMedXMLFetcher(ubic.gemma.core.loader.entrez.pubmed.PubMedXMLFetcher) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) BibliographicReference(ubic.gemma.model.common.description.BibliographicReference) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException)

Example 94 with ExpressionExperiment

use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.

the class GeoServiceImpl method fetchAndLoad.

/**
 * Given a GEO GSE or GDS (or GPL, but support might not be complete)
 * <ol>
 * <li>Check that it doesn't already exist in the system</li>
 * <li>Download and parse GDS files and GSE file needed</li>
 * <li>Convert the GDS and GSE into a ExpressionExperiment (or just the ArrayDesigns)
 * <li>Load the resulting ExpressionExperiment and/or ArrayDesigns into Gemma</li>
 * </ol>
 */
@Override
public Collection<?> fetchAndLoad(String geoAccession, boolean loadPlatformOnly, boolean doSampleMatching, boolean splitByPlatform, boolean allowSuperSeriesImport, boolean allowSubSeriesImport) {
    /*
         * We do this to get a fresh instantiation of GeoConverter (prototype scope)
         */
    GeoConverter geoConverter = (GeoConverter) this.beanFactory.getBean("geoConverter");
    if (this.geoDomainObjectGenerator == null) {
        this.geoDomainObjectGenerator = new GeoDomainObjectGenerator();
    } else {
        this.geoDomainObjectGenerator.initialize();
    }
    geoDomainObjectGenerator.setProcessPlatformsOnly(geoAccession.startsWith("GPL") || loadPlatformOnly);
    geoDomainObjectGenerator.setDoSampleMatching(doSampleMatching && !splitByPlatform);
    Collection<DatabaseEntry> projectedAccessions = geoDomainObjectGenerator.getProjectedAccessions(geoAccession);
    this.checkForExisting(projectedAccessions);
    if (loadPlatformOnly) {
        Collection<? extends GeoData> platforms = geoDomainObjectGenerator.generate(geoAccession);
        if (platforms.size() == 0) {
            AbstractGeoService.log.warn("GeoService.fetchAndLoad( targetPlatformAcc, true, false, false, false );t no results");
            return null;
        }
        geoConverter.setForceConvertElements(true);
        Collection<Object> arrayDesigns = geoConverter.convert(platforms);
        return persisterHelper.persist(arrayDesigns);
    }
    Collection<? extends GeoData> parseResult = geoDomainObjectGenerator.generate(geoAccession);
    if (parseResult.size() == 0) {
        AbstractGeoService.log.warn("Got no results");
        return null;
    }
    AbstractGeoService.log.debug("Generated GEO domain objects for " + geoAccession);
    Object obj = parseResult.iterator().next();
    if (!(obj instanceof GeoSeries)) {
        throw new RuntimeException("Got a " + obj.getClass().getName() + " instead of a " + GeoSeries.class.getName() + " (you may need to load platforms only).");
    }
    GeoSeries series = (GeoSeries) obj;
    String seriesAccession = series.getGeoAccession();
    if (series.isSuperSeries()) {
        if (allowSuperSeriesImport) {
            AbstractGeoService.log.info(" ========= SuperSeries Detected! =========");
            AbstractGeoService.log.info("Please make sure you want to import this as a superseries and not the individual subseries");
        } else {
            throw new IllegalStateException("SuperSeries detected, set 'allowSuperSeriesImport' to 'true' to allow this dataset to load");
        }
    }
    if (series.isSubSeries()) {
        if (allowSubSeriesImport) {
            AbstractGeoService.log.info(" ========= Subseries Detected! =========");
            AbstractGeoService.log.info("Please make sure you want to import this as a subseries and not the superseries");
        } else {
            throw new IllegalStateException("SubSeries detected, set 'allowSubSeriesImport' to 'true' to allow this dataset to load");
        }
    }
    this.confirmPlatformUniqueness(series, doSampleMatching && !splitByPlatform);
    ArrayDesignsForExperimentCache c = new ArrayDesignsForExperimentCache();
    this.matchToExistingPlatforms(geoConverter, series, c);
    this.checkSamplesAreNew(series);
    this.getSubSeriesInformation(series);
    geoConverter.clear();
    geoConverter.setSplitByPlatform(splitByPlatform);
    // noinspection unchecked
    Collection<ExpressionExperiment> result = (Collection<ExpressionExperiment>) geoConverter.convert(series);
    this.check(result);
    this.getPubMedInfo(result);
    AbstractGeoService.log.debug("Converted " + seriesAccession);
    assert persisterHelper != null;
    Collection<ExpressionExperiment> persistedResult = new HashSet<>();
    for (ExpressionExperiment ee : result) {
        c = expressionExperimentPrePersistService.prepare(ee, c);
        ee = persisterHelper.persist(ee, c);
        persistedResult.add(ee);
        AbstractGeoService.log.debug("Persisted " + seriesAccession);
    }
    this.updateReports(persistedResult);
    return persistedResult;
}
Also used : DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) ArrayDesignsForExperimentCache(ubic.gemma.persistence.util.ArrayDesignsForExperimentCache) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) GeoDomainObjectGenerator(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator) GeoConverter(ubic.gemma.core.loader.expression.geo.GeoConverter)

Example 95 with ExpressionExperiment

use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorDaoImpl method handleGetProcessedExpressionDataArrays.

/**
 * This is an important method for fetching vectors.
 *
 * @param genes genes
 * @param ees   ees
 * @return vectors, possibly subsetted.
 */
private Collection<DoubleVectorValueObject> handleGetProcessedExpressionDataArrays(Collection<? extends BioAssaySet> ees, Collection<Long> genes) {
    // ees must be thawed first as currently implemented (?)
    Collection<DoubleVectorValueObject> results = new HashSet<>();
    /*
         * Check the cache.
         */
    Collection<ExpressionExperiment> needToSearch = new HashSet<>();
    Collection<Long> genesToSearch = new HashSet<>();
    this.checkCache(ees, genes, results, needToSearch, genesToSearch);
    AbstractDao.log.info("Using " + results.size() + " DoubleVectorValueObject(s) from cache");
    if (needToSearch.size() == 0) {
        return results;
    }
    /*
         * Get items not in the cache.
         */
    AbstractDao.log.info("Searching for vectors for " + genes.size() + " genes from " + needToSearch.size() + " experiments not in cache");
    Collection<ArrayDesign> arrays = CommonQueries.getArrayDesignsUsed(EntityUtils.getIds(this.getExperiments(ees)), this.getSessionFactory().getCurrentSession()).keySet();
    assert !arrays.isEmpty();
    Map<Long, Collection<Long>> cs2gene = CommonQueries.getCs2GeneIdMap(genesToSearch, EntityUtils.getIds(arrays), this.getSessionFactory().getCurrentSession());
    if (cs2gene.size() == 0) {
        if (results.isEmpty()) {
            AbstractDao.log.warn("No composite sequences found for genes");
            return new HashSet<>();
        }
        return results;
    }
    /*
         * Fill in the map, because we want to track information on the specificity of the probes used in the data
         * vectors.
         */
    cs2gene = CommonQueries.getCs2GeneMapForProbes(cs2gene.keySet(), this.getSessionFactory().getCurrentSession());
    Map<ProcessedExpressionDataVector, Collection<Long>> processedDataVectors = this.getProcessedVectors(EntityUtils.getIds(needToSearch), cs2gene);
    Map<BioAssaySet, Collection<BioAssayDimension>> bioAssayDimensions = this.getBioAssayDimensions(needToSearch);
    Collection<DoubleVectorValueObject> newResults = new HashSet<>();
    /*
         * This loop is to ensure that we don't get misaligned vectors for experiments that use more than one array
         * design. See bug 1704. This isn't that common, so we try to break out as soon as possible.
         */
    for (BioAssaySet bas : needToSearch) {
        Collection<BioAssayDimension> dims = bioAssayDimensions.get(bas);
        if (dims == null || dims.isEmpty()) {
            AbstractDao.log.warn("BioAssayDimensions were null/empty unexpectedly.");
            continue;
        }
        /*
             * Get the vectors for just this experiment. This is made more efficient by removing things from the map
             * each time through.
             */
        Map<ProcessedExpressionDataVector, Collection<Long>> vecsForBas = new HashMap<>();
        if (needToSearch.size() == 1) {
            vecsForBas = processedDataVectors;
        } else {
            // isolate the vectors for the current experiment.
            for (Iterator<ProcessedExpressionDataVector> it = processedDataVectors.keySet().iterator(); it.hasNext(); ) {
                ProcessedExpressionDataVector v = it.next();
                if (v.getExpressionExperiment().equals(bas)) {
                    vecsForBas.put(v, processedDataVectors.get(v));
                    // since we're done with it.
                    it.remove();
                }
            }
        }
        /*
             * Now see if anything is 'ragged' (fewer bioassays per biomaterial than in some other vector)
             */
        if (dims.size() == 1) {
            newResults.addAll(this.unpack(vecsForBas));
        } else {
            BioAssayDimension longestBad = this.checkRagged(dims);
            if (longestBad == null) {
                newResults.addAll(this.unpack(vecsForBas));
            } else {
                newResults.addAll(this.unpack(vecsForBas, longestBad));
            }
        }
    }
    if (!newResults.isEmpty()) {
        this.cacheResults(newResults);
        newResults = this.sliceSubsets(ees, newResults);
        results.addAll(newResults);
    }
    return results;
}
Also used : ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) BioAssaySet(ubic.gemma.model.expression.experiment.BioAssaySet)

Aggregations

ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)214 Test (org.junit.Test)71 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)42 InputStream (java.io.InputStream)36 GeoSeries (ubic.gemma.core.loader.expression.geo.model.GeoSeries)29 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)29 GZIPInputStream (java.util.zip.GZIPInputStream)28 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)26 HashSet (java.util.HashSet)25 BioAssaySet (ubic.gemma.model.expression.experiment.BioAssaySet)25 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)23 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)22 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)20 Collection (java.util.Collection)18 StopWatch (org.apache.commons.lang3.time.StopWatch)18 ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)18 Taxon (ubic.gemma.model.genome.Taxon)14 Before (org.junit.Before)12 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)12 RequestMapping (org.springframework.web.bind.annotation.RequestMapping)11