use of ubic.gemma.model.association.coexpression.ExperimentCoexpressionLink in project Gemma by PavlidisLab.
the class CoexpressionDaoImpl method saveExperimentLevelLinks.
private void saveExperimentLevelLinks(Session sess, LinkCreator c, TreeMap<Long, NonPersistentNonOrderedCoexpLink> links, BioAssaySet bioAssaySet) {
int progress = 0;
int BATCH_SIZE = 1024;
List<ExperimentCoexpressionLink> flippedLinks = new ArrayList<>();
for (Long linkid : links.keySet()) {
NonPersistentNonOrderedCoexpLink link = links.get(linkid);
ExperimentCoexpressionLink ecl = c.createEELink(bioAssaySet, linkid, link.getFirstGene(), link.getSecondGene());
/*
* At same time, create flipped versions, but save them later for ordering. Notice that we use the SAME link
* ID - not the one for the flipped version in the gene2gene table.
*
* Ideally we would ensure that the gene2gene link ID used is the same for all links that are between
* the same pair of genes. That would let us be able to easily count the support directly from an
* experiment-level query, without going to the supportDetails. I do not believe the current code guarantees
* this.
*/
flippedLinks.add(c.createEELink(bioAssaySet, linkid, link.getSecondGene(), link.getFirstGene()));
sess.save(ecl);
if (++progress % 50000 == 0) {
CoexpressionDaoImpl.log.info("Created " + progress + "/" + links.size() + " experiment-level links...");
}
if (progress % BATCH_SIZE == 0) {
sess.flush();
sess.clear();
}
}
sess.flush();
sess.clear();
/*
* Sort the flipped links by the first gene
*/
Collections.sort(flippedLinks, new Comparator<ExperimentCoexpressionLink>() {
@Override
public int compare(ExperimentCoexpressionLink o1, ExperimentCoexpressionLink o2) {
return o1.getFirstGene().compareTo(o2.getFirstGene());
}
});
/*
* Save the flipped ones.
*/
progress = 0;
for (ExperimentCoexpressionLink fl : flippedLinks) {
sess.save(fl);
if (++progress % 50000 == 0) {
CoexpressionDaoImpl.log.info("Created " + progress + "/" + links.size() + " flipped experiment-level links...");
}
if (progress % BATCH_SIZE == 0) {
sess.flush();
sess.clear();
}
}
// one for the road.
sess.flush();
sess.clear();
}
use of ubic.gemma.model.association.coexpression.ExperimentCoexpressionLink in project Gemma by PavlidisLab.
the class CoexpressionDaoImpl method getCoexpression.
@Override
@Transactional(readOnly = true)
public Collection<CoexpressionValueObject> getCoexpression(Taxon taxon, BioAssaySet experiment, boolean quick) {
Session sess = this.getSessionFactory().getCurrentSession();
// could just fetch linkId.
Query q = sess.createQuery(" from " + CoexpressionQueryUtils.getExperimentLinkClassName(taxon) + " where experiment=:ee");
q.setParameter("ee", experiment);
List<ExperimentCoexpressionLink> links = q.list();
Collection<CoexpressionValueObject> results = new HashSet<>();
if (links.isEmpty()) {
return results;
}
List<Long> linksToFetch = new ArrayList<>();
for (ExperimentCoexpressionLink link : links) {
linksToFetch.add(link.getLinkId());
}
String q2 = "from " + CoexpressionQueryUtils.getGeneLinkClassName(taxon) + " where id in (:ids)";
BatchIterator<Long> it = BatchIterator.batches(linksToFetch, 1000);
for (; it.hasNext(); ) {
List<Gene2GeneCoexpression> rawResults = sess.createQuery(q2).setParameterList("ids", it.next()).list();
for (Gene2GeneCoexpression g2g : rawResults) {
CoexpressionValueObject g2gvo = new CoexpressionValueObject(g2g);
results.add(g2gvo);
}
}
if (!quick) {
this.populateTestedInDetails(results);
}
return results;
}
use of ubic.gemma.model.association.coexpression.ExperimentCoexpressionLink in project Gemma by PavlidisLab.
the class CoexpressionDaoImpl method getCoexpressionFromCacheOrDbViaExperiments.
/*
* Get links from the cache or the database, querying in experiment-first mode, but constrained to involve the given
* genes. Does not do the trimming step, nor are the results guaranteed to meet the stringency set.
*/
private Map<Long, List<CoexpressionValueObject>> getCoexpressionFromCacheOrDbViaExperiments(Taxon t, Collection<Long> genes, Collection<Long> bas, int stringency, boolean quick) {
assert stringency <= bas.size();
assert !genes.isEmpty();
Map<Long, List<CoexpressionValueObject>> results = new HashMap<>();
/*
* First, check the cache -- if the stringency is >= limit
*/
Collection<Long> genesNeeded = new HashSet<>(genes);
if (stringency >= CoexpressionCache.CACHE_QUERY_STRINGENCY) {
genesNeeded = this.checkCache(genes, results);
if (genesNeeded.isEmpty()) {
return results;
}
}
/*
* Get all the data for all the experiments queried, constrained to involve the genes in question.
*
* This uses the ECL1EFK index, which is of (experiment, gene1, gene2). Note that if there are a lot of genes
* this can get slow ...
*/
Query q = this.getSessionFactory().getCurrentSession().createQuery(" from " + CoexpressionQueryUtils.getExperimentLinkClassName(t) + " where experiment.id in (:ees) and firstGene in (:genes)");
// May need to batch over genes...
BatchIterator<Long> it = BatchIterator.batches(bas, CoexpressionDaoImpl.BATCH_SIZE_SMALL);
StopWatch timer = new StopWatch();
timer.start();
List<ExperimentCoexpressionLink> links = new ArrayList<>();
for (; it.hasNext(); ) {
q.setParameterList("ees", it.next()).setParameterList("genes", genesNeeded);
links.addAll(q.list());
}
if (timer.getTime() > 2000) {
CoexpressionDaoImpl.log.info("Query for coexp for : " + genes.size() + " genes " + " in " + bas.size() + " experiments: " + timer.getTime() + "ms");
}
/*
* Track the support for the links among the queried data sets as we go over this in experiment-major mode.
*/
// noinspection MismatchedQueryAndUpdateOfCollection // We still need to compare it to stringency
CountingMap<Long> supportCounts = new CountingMap<>();
List<Long> keepers = new ArrayList<>();
for (ExperimentCoexpressionLink link : links) {
assert genes.contains(link.getFirstGene());
if (supportCounts.increment(link.getLinkId()) >= stringency) {
keepers.add(link.getLinkId());
}
}
if (keepers.isEmpty()) {
return new HashMap<>();
}
return this.loadAndConvertLinks(t, keepers, genes, quick);
}
Aggregations