use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorDaoImpl method checkCache.
/**
* We cache vectors at the experiment level. If we need subsets, we have to slice them out.
*
* @param bioAssaySets that we exactly need the data for.
* @param genes that might have cached results
* @param results from the cache will be put here
* @param needToSearch experiments that need to be searched (not fully cached); this will be populated
* @param genesToSearch that still need to be searched (not in cache)
*/
private void checkCache(Collection<? extends BioAssaySet> bioAssaySets, Collection<Long> genes, Collection<DoubleVectorValueObject> results, Collection<ExpressionExperiment> needToSearch, Collection<Long> genesToSearch) {
for (BioAssaySet ee : bioAssaySets) {
ExpressionExperiment experiment = null;
boolean needSubSet = false;
if (ee instanceof ExpressionExperiment) {
experiment = (ExpressionExperiment) ee;
} else if (ee instanceof ExpressionExperimentSubSet) {
experiment = ((ExpressionExperimentSubSet) ee).getSourceExperiment();
needSubSet = true;
}
assert experiment != null;
for (Long g : genes) {
Collection<DoubleVectorValueObject> obs = processedDataVectorCache.get(ee, g);
if (obs != null) {
if (needSubSet) {
obs = this.sliceSubSet((ExpressionExperimentSubSet) ee, obs);
}
results.addAll(obs);
} else {
genesToSearch.add(g);
}
}
/*
* This experiment is not fully cached for the genes in question.
*/
if (genesToSearch.size() > 0) {
needToSearch.add(experiment);
}
}
}
use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorDaoImpl method getProcessedDataArraysByProbeIds.
private Collection<DoubleVectorValueObject> getProcessedDataArraysByProbeIds(Collection<? extends BioAssaySet> ees, Collection<Long> probeIds) {
Collection<DoubleVectorValueObject> results = new HashSet<>();
Map<Long, Collection<Long>> cs2gene = CommonQueries.getCs2GeneMapForProbes(probeIds, this.getSessionFactory().getCurrentSession());
Map<Long, Collection<Long>> noGeneProbes = new HashMap<>();
for (Long pid : probeIds) {
if (!cs2gene.containsKey(pid) || cs2gene.get(pid).isEmpty()) {
noGeneProbes.put(pid, new HashSet<Long>());
cs2gene.remove(pid);
}
}
AbstractDao.log.info(cs2gene.size() + " probes associated with a gene; " + noGeneProbes.size() + " not");
/*
* To Check the cache we need the list of genes 1st. Get from CS2Gene list then check the cache.
*/
Collection<Long> genes = new HashSet<>();
for (Long cs : cs2gene.keySet()) {
genes.addAll(cs2gene.get(cs));
}
Collection<ExpressionExperiment> needToSearch = new HashSet<>();
Collection<Long> genesToSearch = new HashSet<>();
this.checkCache(ees, genes, results, needToSearch, genesToSearch);
if (!results.isEmpty())
AbstractDao.log.info(results.size() + " vectors fetched from cache");
Map<ProcessedExpressionDataVector, Collection<Long>> rawResults = new HashMap<>();
/*
* Small problem: noGeneProbes are never really cached since we use the gene as part of that.
*/
if (!noGeneProbes.isEmpty()) {
Collection<ExpressionExperiment> eesForNoGeneProbes = new HashSet<>();
for (BioAssaySet ee : ees) {
if (ee instanceof ExpressionExperiment) {
eesForNoGeneProbes.add((ExpressionExperiment) ee);
} else {
eesForNoGeneProbes.add(((ExpressionExperimentSubSet) ee).getSourceExperiment());
}
}
needToSearch.addAll(eesForNoGeneProbes);
rawResults.putAll(this.getProcessedVectors(EntityUtils.getIds(eesForNoGeneProbes), noGeneProbes));
}
if (!rawResults.isEmpty())
AbstractDao.log.info(rawResults.size() + " vectors retrieved so far, for noGeneProbes");
/*
* Non-cached items.
*/
if (!needToSearch.isEmpty()) {
rawResults.putAll(this.getProcessedVectors(EntityUtils.getIds(needToSearch), cs2gene));
}
if (!rawResults.isEmpty())
AbstractDao.log.info(rawResults.size() + " vectors retrieved so far, after fetching non-cached.");
/*
* Deal with possibility of 'gaps' and unpack the vectors.
*/
Collection<DoubleVectorValueObject> newResults = new HashSet<>();
for (ExpressionExperiment ee : needToSearch) {
Collection<BioAssayDimension> bioAssayDimensions = this.getBioAssayDimensions(ee);
if (bioAssayDimensions.size() == 1) {
newResults.addAll(this.unpack(rawResults));
} else {
/*
* See handleGetProcessedExpressionDataArrays(Collection<? extends BioAssaySet>, Collection<Gene>,
* boolean) and bug 1704.
*/
BioAssayDimension longestBad = this.checkRagged(bioAssayDimensions);
assert longestBad != null;
newResults.addAll(this.unpack(rawResults, longestBad));
}
if (!newResults.isEmpty()) {
this.cacheResults(newResults);
newResults = this.sliceSubsets(ees, newResults);
results.addAll(newResults);
}
}
return results;
}
use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorDaoImpl method createProcessedDataVectors.
@Override
public ExpressionExperiment createProcessedDataVectors(ExpressionExperiment ee) {
if (ee == null) {
throw new IllegalStateException("ExpressionExperiment cannot be null");
}
ExpressionExperiment expressionExperiment = (ExpressionExperiment) this.getSessionFactory().getCurrentSession().get(ExpressionExperiment.class, ee.getId());
assert expressionExperiment != null;
this.removeProcessedDataVectors(expressionExperiment);
Hibernate.initialize(expressionExperiment);
Hibernate.initialize(expressionExperiment.getQuantitationTypes());
Hibernate.initialize(expressionExperiment.getProcessedExpressionDataVectors());
expressionExperiment.getProcessedExpressionDataVectors().clear();
AbstractDao.log.info("Computing processed expression vectors for " + expressionExperiment);
boolean isTwoChannel = this.isTwoChannel(expressionExperiment);
Collection<RawExpressionDataVector> missingValueVectors = new HashSet<>();
if (isTwoChannel) {
missingValueVectors = this.getMissingValueVectors(expressionExperiment);
}
Collection<RawExpressionDataVector> preferredDataVectors = this.getPreferredDataVectors(expressionExperiment);
if (preferredDataVectors.isEmpty()) {
throw new IllegalArgumentException("No preferred data vectors for " + expressionExperiment);
}
Map<CompositeSequence, DoubleVectorValueObject> maskedVectorObjects = this.maskAndUnpack(preferredDataVectors, missingValueVectors);
/*
* Create the vectors. Do a sanity check that we don't have more than we should
*/
Collection<CompositeSequence> seenDes = new HashSet<>();
RawExpressionDataVector preferredDataVectorExemplar = preferredDataVectors.iterator().next();
QuantitationType preferredMaskedDataQuantitationType = this.getPreferredMaskedDataQuantitationType(preferredDataVectorExemplar.getQuantitationType());
/*
* Note that we used to not normalize count data, but we've removed this restriction; and in any case we have
* moved to using non-count summaries for the primary data type.
*/
if (preferredMaskedDataQuantitationType.getType().equals(StandardQuantitationType.COUNT)) {
/*
* Backfill target
*/
AbstractDao.log.warn("Preferred data are counts; please convert to log2cpm");
}
if (!preferredMaskedDataQuantitationType.getIsRatio() && maskedVectorObjects.size() > ProcessedExpressionDataVectorDaoImpl.MIN_SIZE_FOR_RENORMALIZATION) {
AbstractDao.log.info("Normalizing the data");
this.renormalize(maskedVectorObjects);
} else {
AbstractDao.log.info("Normalization skipped for this data set (not suitable)");
}
int i = 0;
for (CompositeSequence cs : maskedVectorObjects.keySet()) {
DoubleVectorValueObject dvvo = maskedVectorObjects.get(cs);
if (seenDes.contains(cs)) {
// defensive programming, this happens.
throw new IllegalStateException("Duplicated design element: " + cs + "; make sure the experiment has only one 'preferred' quantitation type. " + "Perhaps you need to run vector merging following an array design switch?");
}
ProcessedExpressionDataVector vec = (ProcessedExpressionDataVector) dvvo.toDesignElementDataVector(ee, cs, preferredMaskedDataQuantitationType);
expressionExperiment.getProcessedExpressionDataVectors().add(vec);
seenDes.add(cs);
if (++i % 5000 == 0) {
AbstractDao.log.info(i + " vectors built");
}
}
AbstractDao.log.info("Persisting " + expressionExperiment.getProcessedExpressionDataVectors().size() + " processed data vectors");
expressionExperiment.getQuantitationTypes().add(preferredMaskedDataQuantitationType);
expressionExperiment.setNumberOfDataVectors(expressionExperiment.getProcessedExpressionDataVectors().size());
this.getSessionFactory().getCurrentSession().update(expressionExperiment);
assert expressionExperiment.getNumberOfDataVectors() != null;
this.processedDataVectorCache.clearCache(expressionExperiment.getId());
return expressionExperiment;
}
use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorDaoImpl method createProcessedDataVectors.
@Override
public ExpressionExperiment createProcessedDataVectors(ExpressionExperiment ee, Collection<ProcessedExpressionDataVector> data) {
if (ee == null) {
throw new IllegalStateException("ExpressionExperiment cannot be null");
}
ExpressionExperiment expressionExperiment = (ExpressionExperiment) this.getSessionFactory().getCurrentSession().get(ExpressionExperiment.class, ee.getId());
assert expressionExperiment != null;
this.removeProcessedDataVectors(expressionExperiment);
Hibernate.initialize(expressionExperiment);
Hibernate.initialize(expressionExperiment.getQuantitationTypes());
expressionExperiment.setProcessedExpressionDataVectors(data);
QuantitationType qt = data.iterator().next().getQuantitationType();
// assumes all are same.
this.getSessionFactory().getCurrentSession().saveOrUpdate(qt);
expressionExperiment.getQuantitationTypes().add(data.iterator().next().getQuantitationType());
expressionExperiment.setNumberOfDataVectors(expressionExperiment.getProcessedExpressionDataVectors().size());
this.getSessionFactory().getCurrentSession().update(expressionExperiment);
assert expressionExperiment.getNumberOfDataVectors() != null;
this.processedDataVectorCache.clearCache(expressionExperiment.getId());
return expressionExperiment;
}
use of ubic.gemma.model.expression.experiment.ExpressionExperiment in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorDaoImpl method getBioAssayDimensions.
private Collection<BioAssayDimension> getBioAssayDimensions(BioAssaySet ee) {
if (ee instanceof ExpressionExperiment) {
StopWatch timer = new StopWatch();
timer.start();
List r = this.getSessionFactory().getCurrentSession().createQuery(// this does not look efficient.
"select distinct bad from ExpressionExperiment e, BioAssayDimension bad" + " inner join e.bioAssays b inner join bad.bioAssays badba where e = :ee and b in (badba) ").setParameter("ee", ee).list();
timer.stop();
if (timer.getTime() > 100) {
AbstractDao.log.info("Fetch " + r.size() + " bioassayDimensions for experiment id=" + ee.getId() + ": " + timer.getTime() + "ms");
}
// noinspection unchecked
return r;
}
// subset.
return this.getBioAssayDimensions(this.getExperiment(ee));
}
Aggregations