use of ubic.gemma.model.association.coexpression.Gene2GeneCoexpression in project Gemma by PavlidisLab.
the class CoexpressionDaoImpl method getInterCoexpressionFromDbViaGenes.
/*
* Does not check the cache - this must be done by the caller
*
*/
private Map<Long, List<CoexpressionValueObject>> getInterCoexpressionFromDbViaGenes(Taxon taxon, Collection<Long> genes, int stringency, boolean quick) {
if (genes.size() == 0)
return new HashMap<>();
Map<Long, List<CoexpressionValueObject>> results = new HashMap<>();
// we assume the genes are from the same taxon. Confirmed: this uses the index (see bug 4055)
String g2gClassName = CoexpressionQueryUtils.getGeneLinkClassName(taxon);
final String firstQueryString = "select g2g from " + g2gClassName + " as g2g where g2g.firstGene in (:qgene) and g2g.secondGene in (:genes) " + "and g2g.numDataSetsSupporting >= :stringency ";
/*
* Note: if the number of genes is too large, it may be faster to simply query without the second 'in' clause
* and filter the results.
*/
StopWatch oTimer = new StopWatch();
oTimer.start();
int batchSize = 32;
BatchIterator<Long> it = BatchIterator.batches(genes, batchSize);
List<CoexpressionValueObject> g2gs = new ArrayList<>(genes.size());
Set<CoexpressionValueObject> seen = new HashSet<>();
for (; it.hasNext(); ) {
Collection<Long> queryGeneBatch = it.next();
StopWatch timer = new StopWatch();
timer.start();
Collection<Gene2GeneCoexpression> r = this.getHibernateTemplate().findByNamedParam(firstQueryString, new String[] { "qgene", "genes", "stringency" }, new Object[] { queryGeneBatch, genes, stringency });
if (timer.getTime() > 5000) {
CoexpressionDaoImpl.log.debug("Slow query: " + firstQueryString + " took " + timer.getTime() + "ms (" + queryGeneBatch.size() + " query gene batch, " + genes.size() + " target genes), Stringency=" + stringency);
}
// raw db results, for a batch of genes, add to the whole.
for (Gene2GeneCoexpression g2g : r) {
CoexpressionValueObject g2gvo = new CoexpressionValueObject(g2g);
// might not be returned as query genes, since they show up in the 'coexpressed' gene instead.
if (seen.contains(g2gvo))
continue;
seen.add(g2gvo);
g2gvo.setInterQueryLink(true);
g2gs.add(g2gvo);
}
}
if (!quick && !g2gs.isEmpty()) {
StopWatch timer = new StopWatch();
timer.start();
this.populateTestedInDetails(g2gs);
if (timer.getTime() > 2000) {
CoexpressionDaoImpl.log.debug("Query genes only,fetch tested-in details " + g2gs.size() + " results took " + timer.getTime() + "ms");
}
timer.reset();
timer.start();
}
/*
* all the genes are guaranteed to be in the query list.
*/
for (CoexpressionValueObject g2g : g2gs) {
if (!results.containsKey(g2g.getQueryGeneId())) {
results.put(g2g.getQueryGeneId(), new ArrayList<CoexpressionValueObject>());
}
results.get(g2g.getQueryGeneId()).add(g2g);
}
if (oTimer.getTime() > 2000) {
CoexpressionDaoImpl.log.info("Query genes only, fetch for " + genes.size() + " genes took " + oTimer.getTime() + "ms");
}
for (Long id : results.keySet()) {
Collections.sort(results.get(id));
}
return results;
}
use of ubic.gemma.model.association.coexpression.Gene2GeneCoexpression in project Gemma by PavlidisLab.
the class CoexpressionDaoImpl method saveBatchAndMakeFlipped.
/**
* Save a batch of <strong>new</strong> links, and construct the to-be-persisted flipped versions.
*
* @param session session
* @param linkIds will be updated with the ids of the links which were saved.
* @param batch; will be cleared by this call.
* @param c to create flipped versions of appropriate class
* @return flipped versions which we will accumulate, sort and save later.
*/
private List<Gene2GeneCoexpression> saveBatchAndMakeFlipped(Session session, Map<Long, NonPersistentNonOrderedCoexpLink> linkIds, Map<SupportDetails, Gene2GeneCoexpression> batch, LinkCreator c) {
StopWatch timer = new StopWatch();
timer.start();
List<Gene2GeneCoexpression> flipped = new ArrayList<>();
for (SupportDetails sd : batch.keySet()) {
// have to do this first otherwise adding the ID changes hashcode...
Gene2GeneCoexpression g2g = batch.get(sd);
assert g2g != null;
session.save(sd);
assert sd.getNumIds() > 0;
g2g.setSupportDetails(sd);
assert sd.getNumIds() > 0;
assert g2g.getNumDatasetsSupporting() > 0;
assert g2g.getSupportDetails().getNumIds() > 0;
// make a copy that has the genes flipped; reuse the supportDetails.
Gene2GeneCoexpression flippedG2g = c.create(g2g.isPositiveCorrelation() ? 1 : -1, g2g.getSecondGene(), g2g.getFirstGene());
flippedG2g.setSupportDetails(g2g.getSupportDetails());
flipped.add(flippedG2g);
assert flippedG2g.getFirstGene().equals(g2g.getSecondGene());
assert flippedG2g.getSecondGene().equals(g2g.getFirstGene());
}
for (Gene2GeneCoexpression g2g : batch.values()) {
Long id = (Long) session.save(g2g);
linkIds.put(id, new NonPersistentNonOrderedCoexpLink(g2g));
}
session.flush();
session.clear();
batch.clear();
if (timer.getTime() > 1000) {
CoexpressionDaoImpl.log.info("Saved batch: " + timer.getTime() + "ms");
}
return flipped;
}
use of ubic.gemma.model.association.coexpression.Gene2GeneCoexpression in project Gemma by PavlidisLab.
the class LinkAnalysisPersisterImpl method initializeLinksFromOldData.
@Override
public void initializeLinksFromOldData(Taxon t) {
Collection<Gene> genes = geneService.loadAll(t);
Map<Long, Gene> idMap = EntityUtils.getIdMap(genes);
/*
* First count the old links for every gene, and remove genes that have too few. That set of genes has to be
* passed in to the service so they would be recognized in the second gene. We have to do that counting as a
* separate step because we need to know ahead of time. This might be more trouble than it is worth...
*/
LinkAnalysisPersisterImpl.log.info("Counting old links for " + genes.size() + " genes.");
Map<Gene, Integer> counts = gene2GeneCoexpressionService.countOldLinks(genes);
int LIMIT = 100;
Set<Long> skipGenes = new HashSet<>();
for (Gene g : counts.keySet()) {
if (counts.get(g) < LIMIT) {
skipGenes.add(g.getId());
}
}
if (skipGenes.size() == genes.size()) {
throw new IllegalStateException("There weren't enough links to bother making any stubs.");
}
Map<NonPersistentNonOrderedCoexpLink, SupportDetails> linksSoFar = new HashMap<>();
LinkAnalysisPersisterImpl.log.info("Creating stub links for up to " + genes.size() + " genes; " + skipGenes.size() + " genes will be ignored because they have too few links.");
int numGenes = 0;
int count = 0;
for (Gene gene : genes) {
Map<SupportDetails, Gene2GeneCoexpression> links = gene2GeneCoexpressionService.initializeLinksFromOldData(gene, idMap, linksSoFar, skipGenes);
if (links == null || links.isEmpty())
continue;
count += links.size();
/*
* Keep track of links created so far (ignoring "direction") so we can resuse the supportDetails.
*/
for (SupportDetails sd : links.keySet()) {
assert sd.getId() != null;
Gene2GeneCoexpression g2g = links.get(sd);
assert g2g.getId() != null;
assert g2g.getSupportDetails() != null && g2g.getSupportDetails().getId() != null;
assert sd.equals(g2g.getSupportDetails());
NonPersistentNonOrderedCoexpLink linkVO = new NonPersistentNonOrderedCoexpLink(g2g.getFirstGene(), g2g.getSecondGene(), g2g.isPositiveCorrelation());
if (linksSoFar.containsKey(linkVO)) {
// directions. Removing it will help us free up memory.
assert sd.equals(linksSoFar.get(linkVO));
linksSoFar.remove(linkVO);
} else {
linksSoFar.put(linkVO, sd);
}
}
LinkAnalysisPersisterImpl.log.info(links.size() + " links created for " + gene + ", " + count + " links created so far.");
if (++numGenes % 500 == 0) {
LinkAnalysisPersisterImpl.log.info("***** " + numGenes + " processed");
}
}
}
use of ubic.gemma.model.association.coexpression.Gene2GeneCoexpression in project Gemma by PavlidisLab.
the class CoexpressionDaoImpl method getCoexpressionFromDbViaGenes.
/**
* Gene-focused query. Use this if you don't care about which data sets are involved (or if there are many data
* sets), for a relatively small number of genes. This DOES NOT cache the results, the caller has to do that. It
* also does not check the cache.
*
* @param geneIds the gene IDs
* @param className the class name
* @return results without any limit on the size, each list is already sorted.
*/
private Map<Long, List<CoexpressionValueObject>> getCoexpressionFromDbViaGenes(Collection<Long> geneIds, String className) {
Query q = this.buildQuery(geneIds, className);
StopWatch timer = new StopWatch();
timer.start();
List<Gene2GeneCoexpression> rawResults = q.list();
if (timer.getTime() > 1000) {
CoexpressionDaoImpl.log.debug("Initial coexp query for " + geneIds.size() + "genes took " + timer.getTime() + "ms: " + rawResults.size() + " results");
CoexpressionDaoImpl.log.debug("Query was: " + q.getQueryString());
}
if (rawResults.isEmpty())
return new HashMap<>();
timer.reset();
timer.start();
Map<Long, List<CoexpressionValueObject>> results = this.convertToValueObjects(rawResults, geneIds);
for (Long g : results.keySet()) {
List<CoexpressionValueObject> gc = results.get(g);
Collections.sort(gc);
}
if (timer.getTime() > 100) {
CoexpressionDaoImpl.log.debug("Convert to value objects, filter, sort and finish " + rawResults.size() + " results: " + timer.getTime() + "ms");
}
return results;
}
use of ubic.gemma.model.association.coexpression.Gene2GeneCoexpression in project Gemma by PavlidisLab.
the class CoexpressionDaoImpl method getCoexpression.
@Override
@Transactional(readOnly = true)
public Collection<CoexpressionValueObject> getCoexpression(Taxon taxon, BioAssaySet experiment, boolean quick) {
Session sess = this.getSessionFactory().getCurrentSession();
// could just fetch linkId.
Query q = sess.createQuery(" from " + CoexpressionQueryUtils.getExperimentLinkClassName(taxon) + " where experiment=:ee");
q.setParameter("ee", experiment);
List<ExperimentCoexpressionLink> links = q.list();
Collection<CoexpressionValueObject> results = new HashSet<>();
if (links.isEmpty()) {
return results;
}
List<Long> linksToFetch = new ArrayList<>();
for (ExperimentCoexpressionLink link : links) {
linksToFetch.add(link.getLinkId());
}
String q2 = "from " + CoexpressionQueryUtils.getGeneLinkClassName(taxon) + " where id in (:ids)";
BatchIterator<Long> it = BatchIterator.batches(linksToFetch, 1000);
for (; it.hasNext(); ) {
List<Gene2GeneCoexpression> rawResults = sess.createQuery(q2).setParameterList("ids", it.next()).list();
for (Gene2GeneCoexpression g2g : rawResults) {
CoexpressionValueObject g2gvo = new CoexpressionValueObject(g2g);
results.add(g2gvo);
}
}
if (!quick) {
this.populateTestedInDetails(results);
}
return results;
}
Aggregations