Search in sources :

Example 96 with ExpressionExperiment

use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorDaoImpl method checkCache.

/**
 * We cache vectors at the experiment level. If we need subsets, we have to slice them out.
 *
 * @param bioAssaySets  that we exactly need the data for.
 * @param genes         that might have cached results
 * @param results       from the cache will be put here
 * @param needToSearch  experiments that need to be searched (not fully cached); this will be populated
 * @param genesToSearch that still need to be searched (not in cache)
 */
private void checkCache(Collection<? extends BioAssaySet> bioAssaySets, Collection<Long> genes, Collection<DoubleVectorValueObject> results, Collection<ExpressionExperiment> needToSearch, Collection<Long> genesToSearch) {
    for (BioAssaySet ee : bioAssaySets) {
        ExpressionExperiment experiment = null;
        boolean needSubSet = false;
        if (ee instanceof ExpressionExperiment) {
            experiment = (ExpressionExperiment) ee;
        } else if (ee instanceof ExpressionExperimentSubSet) {
            experiment = ((ExpressionExperimentSubSet) ee).getSourceExperiment();
            needSubSet = true;
        }
        assert experiment != null;
        for (Long g : genes) {
            Collection<DoubleVectorValueObject> obs = processedDataVectorCache.get(ee, g);
            if (obs != null) {
                if (needSubSet) {
                    obs = this.sliceSubSet((ExpressionExperimentSubSet) ee, obs);
                }
                results.addAll(obs);
            } else {
                genesToSearch.add(g);
            }
        }
        /*
             * This experiment is not fully cached for the genes in question.
             */
        if (genesToSearch.size() > 0) {
            needToSearch.add(experiment);
        }
    }
}
Also used : BioAssaySet(ubic.gemma.model.expression.experiment.BioAssaySet) ExpressionExperimentSubSet(ubic.gemma.model.expression.experiment.ExpressionExperimentSubSet) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment)

Example 97 with ExpressionExperiment

use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorDaoImpl method getProcessedDataArraysByProbeIds.

private Collection<DoubleVectorValueObject> getProcessedDataArraysByProbeIds(Collection<? extends BioAssaySet> ees, Collection<Long> probeIds) {
    Collection<DoubleVectorValueObject> results = new HashSet<>();
    Map<Long, Collection<Long>> cs2gene = CommonQueries.getCs2GeneMapForProbes(probeIds, this.getSessionFactory().getCurrentSession());
    Map<Long, Collection<Long>> noGeneProbes = new HashMap<>();
    for (Long pid : probeIds) {
        if (!cs2gene.containsKey(pid) || cs2gene.get(pid).isEmpty()) {
            noGeneProbes.put(pid, new HashSet<Long>());
            cs2gene.remove(pid);
        }
    }
    AbstractDao.log.info(cs2gene.size() + " probes associated with a gene; " + noGeneProbes.size() + " not");
    /*
         * To Check the cache we need the list of genes 1st. Get from CS2Gene list then check the cache.
         */
    Collection<Long> genes = new HashSet<>();
    for (Long cs : cs2gene.keySet()) {
        genes.addAll(cs2gene.get(cs));
    }
    Collection<ExpressionExperiment> needToSearch = new HashSet<>();
    Collection<Long> genesToSearch = new HashSet<>();
    this.checkCache(ees, genes, results, needToSearch, genesToSearch);
    if (!results.isEmpty())
        AbstractDao.log.info(results.size() + " vectors fetched from cache");
    Map<ProcessedExpressionDataVector, Collection<Long>> rawResults = new HashMap<>();
    /*
         * Small problem: noGeneProbes are never really cached since we use the gene as part of that.
         */
    if (!noGeneProbes.isEmpty()) {
        Collection<ExpressionExperiment> eesForNoGeneProbes = new HashSet<>();
        for (BioAssaySet ee : ees) {
            if (ee instanceof ExpressionExperiment) {
                eesForNoGeneProbes.add((ExpressionExperiment) ee);
            } else {
                eesForNoGeneProbes.add(((ExpressionExperimentSubSet) ee).getSourceExperiment());
            }
        }
        needToSearch.addAll(eesForNoGeneProbes);
        rawResults.putAll(this.getProcessedVectors(EntityUtils.getIds(eesForNoGeneProbes), noGeneProbes));
    }
    if (!rawResults.isEmpty())
        AbstractDao.log.info(rawResults.size() + " vectors retrieved so far, for noGeneProbes");
    /*
         * Non-cached items.
         */
    if (!needToSearch.isEmpty()) {
        rawResults.putAll(this.getProcessedVectors(EntityUtils.getIds(needToSearch), cs2gene));
    }
    if (!rawResults.isEmpty())
        AbstractDao.log.info(rawResults.size() + " vectors retrieved so far, after fetching non-cached.");
    /*
         * Deal with possibility of 'gaps' and unpack the vectors.
         */
    Collection<DoubleVectorValueObject> newResults = new HashSet<>();
    for (ExpressionExperiment ee : needToSearch) {
        Collection<BioAssayDimension> bioAssayDimensions = this.getBioAssayDimensions(ee);
        if (bioAssayDimensions.size() == 1) {
            newResults.addAll(this.unpack(rawResults));
        } else {
            /*
                 * See handleGetProcessedExpressionDataArrays(Collection<? extends BioAssaySet>, Collection<Gene>,
                 * boolean) and bug 1704.
                 */
            BioAssayDimension longestBad = this.checkRagged(bioAssayDimensions);
            assert longestBad != null;
            newResults.addAll(this.unpack(rawResults, longestBad));
        }
        if (!newResults.isEmpty()) {
            this.cacheResults(newResults);
            newResults = this.sliceSubsets(ees, newResults);
            results.addAll(newResults);
        }
    }
    return results;
}
Also used : ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) BioAssaySet(ubic.gemma.model.expression.experiment.BioAssaySet)

Example 98 with ExpressionExperiment

use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorDaoImpl method createProcessedDataVectors.

@Override
public ExpressionExperiment createProcessedDataVectors(ExpressionExperiment ee) {
    if (ee == null) {
        throw new IllegalStateException("ExpressionExperiment cannot be null");
    }
    ExpressionExperiment expressionExperiment = (ExpressionExperiment) this.getSessionFactory().getCurrentSession().get(ExpressionExperiment.class, ee.getId());
    assert expressionExperiment != null;
    this.removeProcessedDataVectors(expressionExperiment);
    Hibernate.initialize(expressionExperiment);
    Hibernate.initialize(expressionExperiment.getQuantitationTypes());
    Hibernate.initialize(expressionExperiment.getProcessedExpressionDataVectors());
    expressionExperiment.getProcessedExpressionDataVectors().clear();
    AbstractDao.log.info("Computing processed expression vectors for " + expressionExperiment);
    boolean isTwoChannel = this.isTwoChannel(expressionExperiment);
    Collection<RawExpressionDataVector> missingValueVectors = new HashSet<>();
    if (isTwoChannel) {
        missingValueVectors = this.getMissingValueVectors(expressionExperiment);
    }
    Collection<RawExpressionDataVector> preferredDataVectors = this.getPreferredDataVectors(expressionExperiment);
    if (preferredDataVectors.isEmpty()) {
        throw new IllegalArgumentException("No preferred data vectors for " + expressionExperiment);
    }
    Map<CompositeSequence, DoubleVectorValueObject> maskedVectorObjects = this.maskAndUnpack(preferredDataVectors, missingValueVectors);
    /*
         * Create the vectors. Do a sanity check that we don't have more than we should
         */
    Collection<CompositeSequence> seenDes = new HashSet<>();
    RawExpressionDataVector preferredDataVectorExemplar = preferredDataVectors.iterator().next();
    QuantitationType preferredMaskedDataQuantitationType = this.getPreferredMaskedDataQuantitationType(preferredDataVectorExemplar.getQuantitationType());
    /*
         * Note that we used to not normalize count data, but we've removed this restriction; and in any case we have
         * moved to using non-count summaries for the primary data type.
         */
    if (preferredMaskedDataQuantitationType.getType().equals(StandardQuantitationType.COUNT)) {
        /*
             * Backfill target
             */
        AbstractDao.log.warn("Preferred data are counts; please convert to log2cpm");
    }
    if (!preferredMaskedDataQuantitationType.getIsRatio() && maskedVectorObjects.size() > ProcessedExpressionDataVectorDaoImpl.MIN_SIZE_FOR_RENORMALIZATION) {
        AbstractDao.log.info("Normalizing the data");
        this.renormalize(maskedVectorObjects);
    } else {
        AbstractDao.log.info("Normalization skipped for this data set (not suitable)");
    }
    int i = 0;
    for (CompositeSequence cs : maskedVectorObjects.keySet()) {
        DoubleVectorValueObject dvvo = maskedVectorObjects.get(cs);
        if (seenDes.contains(cs)) {
            // defensive programming, this happens.
            throw new IllegalStateException("Duplicated design element: " + cs + "; make sure the experiment has only one 'preferred' quantitation type. " + "Perhaps you need to run vector merging following an array design switch?");
        }
        ProcessedExpressionDataVector vec = (ProcessedExpressionDataVector) dvvo.toDesignElementDataVector(ee, cs, preferredMaskedDataQuantitationType);
        expressionExperiment.getProcessedExpressionDataVectors().add(vec);
        seenDes.add(cs);
        if (++i % 5000 == 0) {
            AbstractDao.log.info(i + " vectors built");
        }
    }
    AbstractDao.log.info("Persisting " + expressionExperiment.getProcessedExpressionDataVectors().size() + " processed data vectors");
    expressionExperiment.getQuantitationTypes().add(preferredMaskedDataQuantitationType);
    expressionExperiment.setNumberOfDataVectors(expressionExperiment.getProcessedExpressionDataVectors().size());
    this.getSessionFactory().getCurrentSession().update(expressionExperiment);
    assert expressionExperiment.getNumberOfDataVectors() != null;
    this.processedDataVectorCache.clearCache(expressionExperiment.getId());
    return expressionExperiment;
}
Also used : ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) StandardQuantitationType(ubic.gemma.model.common.quantitationtype.StandardQuantitationType) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 99 with ExpressionExperiment

use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorDaoImpl method createProcessedDataVectors.

@Override
public ExpressionExperiment createProcessedDataVectors(ExpressionExperiment ee, Collection<ProcessedExpressionDataVector> data) {
    if (ee == null) {
        throw new IllegalStateException("ExpressionExperiment cannot be null");
    }
    ExpressionExperiment expressionExperiment = (ExpressionExperiment) this.getSessionFactory().getCurrentSession().get(ExpressionExperiment.class, ee.getId());
    assert expressionExperiment != null;
    this.removeProcessedDataVectors(expressionExperiment);
    Hibernate.initialize(expressionExperiment);
    Hibernate.initialize(expressionExperiment.getQuantitationTypes());
    expressionExperiment.setProcessedExpressionDataVectors(data);
    QuantitationType qt = data.iterator().next().getQuantitationType();
    // assumes all are same.
    this.getSessionFactory().getCurrentSession().saveOrUpdate(qt);
    expressionExperiment.getQuantitationTypes().add(data.iterator().next().getQuantitationType());
    expressionExperiment.setNumberOfDataVectors(expressionExperiment.getProcessedExpressionDataVectors().size());
    this.getSessionFactory().getCurrentSession().update(expressionExperiment);
    assert expressionExperiment.getNumberOfDataVectors() != null;
    this.processedDataVectorCache.clearCache(expressionExperiment.getId());
    return expressionExperiment;
}
Also used : StandardQuantitationType(ubic.gemma.model.common.quantitationtype.StandardQuantitationType) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment)

Example 100 with ExpressionExperiment

use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorDaoImpl method getBioAssayDimensions.

private Collection<BioAssayDimension> getBioAssayDimensions(BioAssaySet ee) {
    if (ee instanceof ExpressionExperiment) {
        StopWatch timer = new StopWatch();
        timer.start();
        List r = this.getSessionFactory().getCurrentSession().createQuery(// this does not look efficient.
        "select distinct bad from ExpressionExperiment e, BioAssayDimension bad" + " inner join e.bioAssays b inner join bad.bioAssays badba where e = :ee and b in (badba) ").setParameter("ee", ee).list();
        timer.stop();
        if (timer.getTime() > 100) {
            AbstractDao.log.info("Fetch " + r.size() + " bioassayDimensions for experiment id=" + ee.getId() + ": " + timer.getTime() + "ms");
        }
        // noinspection unchecked
        return r;
    }
    // subset.
    return this.getBioAssayDimensions(this.getExperiment(ee));
}
Also used : ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) StopWatch(org.apache.commons.lang3.time.StopWatch)

Aggregations

ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)214 Test (org.junit.Test)71 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)42 InputStream (java.io.InputStream)36 GeoSeries (ubic.gemma.core.loader.expression.geo.model.GeoSeries)29 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)29 GZIPInputStream (java.util.zip.GZIPInputStream)28 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)26 HashSet (java.util.HashSet)25 BioAssaySet (ubic.gemma.model.expression.experiment.BioAssaySet)25 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)23 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)22 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)20 Collection (java.util.Collection)18 StopWatch (org.apache.commons.lang3.time.StopWatch)18 ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)18 Taxon (ubic.gemma.model.genome.Taxon)14 Before (org.junit.Before)12 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)12 RequestMapping (org.springframework.web.bind.annotation.RequestMapping)11