Search in sources :

Example 6 with Gene2GeneCoexpression

use of ubic.gemma.model.association.coexpression.Gene2GeneCoexpression in project Gemma by PavlidisLab.

the class CoexpressionDaoImpl method getInterCoexpressionFromDbViaGenes.

/*
     * Does not check the cache - this must be done by the caller
     *
     */
private Map<Long, List<CoexpressionValueObject>> getInterCoexpressionFromDbViaGenes(Taxon taxon, Collection<Long> genes, int stringency, boolean quick) {
    if (genes.size() == 0)
        return new HashMap<>();
    Map<Long, List<CoexpressionValueObject>> results = new HashMap<>();
    // we assume the genes are from the same taxon. Confirmed: this uses the index (see bug 4055)
    String g2gClassName = CoexpressionQueryUtils.getGeneLinkClassName(taxon);
    final String firstQueryString = "select g2g from " + g2gClassName + " as g2g where g2g.firstGene in (:qgene) and g2g.secondGene in (:genes) " + "and g2g.numDataSetsSupporting  >= :stringency ";
    /*
         * Note: if the number of genes is too large, it may be faster to simply query without the second 'in' clause
         * and filter the results.
         */
    StopWatch oTimer = new StopWatch();
    oTimer.start();
    int batchSize = 32;
    BatchIterator<Long> it = BatchIterator.batches(genes, batchSize);
    List<CoexpressionValueObject> g2gs = new ArrayList<>(genes.size());
    Set<CoexpressionValueObject> seen = new HashSet<>();
    for (; it.hasNext(); ) {
        Collection<Long> queryGeneBatch = it.next();
        StopWatch timer = new StopWatch();
        timer.start();
        Collection<Gene2GeneCoexpression> r = this.getHibernateTemplate().findByNamedParam(firstQueryString, new String[] { "qgene", "genes", "stringency" }, new Object[] { queryGeneBatch, genes, stringency });
        if (timer.getTime() > 5000) {
            CoexpressionDaoImpl.log.debug("Slow query: " + firstQueryString + " took " + timer.getTime() + "ms (" + queryGeneBatch.size() + " query gene batch, " + genes.size() + " target genes), Stringency=" + stringency);
        }
        // raw db results, for a batch of genes, add to the whole.
        for (Gene2GeneCoexpression g2g : r) {
            CoexpressionValueObject g2gvo = new CoexpressionValueObject(g2g);
            // might not be returned as query genes, since they show up in the 'coexpressed' gene instead.
            if (seen.contains(g2gvo))
                continue;
            seen.add(g2gvo);
            g2gvo.setInterQueryLink(true);
            g2gs.add(g2gvo);
        }
    }
    if (!quick && !g2gs.isEmpty()) {
        StopWatch timer = new StopWatch();
        timer.start();
        this.populateTestedInDetails(g2gs);
        if (timer.getTime() > 2000) {
            CoexpressionDaoImpl.log.debug("Query genes only,fetch tested-in details " + g2gs.size() + " results took " + timer.getTime() + "ms");
        }
        timer.reset();
        timer.start();
    }
    /*
         * all the genes are guaranteed to be in the query list.
         */
    for (CoexpressionValueObject g2g : g2gs) {
        if (!results.containsKey(g2g.getQueryGeneId())) {
            results.put(g2g.getQueryGeneId(), new ArrayList<CoexpressionValueObject>());
        }
        results.get(g2g.getQueryGeneId()).add(g2g);
    }
    if (oTimer.getTime() > 2000) {
        CoexpressionDaoImpl.log.info("Query genes only, fetch for " + genes.size() + " genes took " + oTimer.getTime() + "ms");
    }
    for (Long id : results.keySet()) {
        Collections.sort(results.get(id));
    }
    return results;
}
Also used : Gene2GeneCoexpression(ubic.gemma.model.association.coexpression.Gene2GeneCoexpression) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 7 with Gene2GeneCoexpression

use of ubic.gemma.model.association.coexpression.Gene2GeneCoexpression in project Gemma by PavlidisLab.

the class CoexpressionDaoImpl method saveBatchAndMakeFlipped.

/**
 * Save a batch of <strong>new</strong> links, and construct the to-be-persisted flipped versions.
 *
 * @param session session
 * @param linkIds will be updated with the ids of the links which were saved.
 * @param batch;  will be cleared by this call.
 * @param c       to create flipped versions of appropriate class
 * @return flipped versions which we will accumulate, sort and save later.
 */
private List<Gene2GeneCoexpression> saveBatchAndMakeFlipped(Session session, Map<Long, NonPersistentNonOrderedCoexpLink> linkIds, Map<SupportDetails, Gene2GeneCoexpression> batch, LinkCreator c) {
    StopWatch timer = new StopWatch();
    timer.start();
    List<Gene2GeneCoexpression> flipped = new ArrayList<>();
    for (SupportDetails sd : batch.keySet()) {
        // have to do this first otherwise adding the ID changes hashcode...
        Gene2GeneCoexpression g2g = batch.get(sd);
        assert g2g != null;
        session.save(sd);
        assert sd.getNumIds() > 0;
        g2g.setSupportDetails(sd);
        assert sd.getNumIds() > 0;
        assert g2g.getNumDatasetsSupporting() > 0;
        assert g2g.getSupportDetails().getNumIds() > 0;
        // make a copy that has the genes flipped; reuse the supportDetails.
        Gene2GeneCoexpression flippedG2g = c.create(g2g.isPositiveCorrelation() ? 1 : -1, g2g.getSecondGene(), g2g.getFirstGene());
        flippedG2g.setSupportDetails(g2g.getSupportDetails());
        flipped.add(flippedG2g);
        assert flippedG2g.getFirstGene().equals(g2g.getSecondGene());
        assert flippedG2g.getSecondGene().equals(g2g.getFirstGene());
    }
    for (Gene2GeneCoexpression g2g : batch.values()) {
        Long id = (Long) session.save(g2g);
        linkIds.put(id, new NonPersistentNonOrderedCoexpLink(g2g));
    }
    session.flush();
    session.clear();
    batch.clear();
    if (timer.getTime() > 1000) {
        CoexpressionDaoImpl.log.info("Saved batch: " + timer.getTime() + "ms");
    }
    return flipped;
}
Also used : Gene2GeneCoexpression(ubic.gemma.model.association.coexpression.Gene2GeneCoexpression) StopWatch(org.apache.commons.lang3.time.StopWatch) SupportDetails(ubic.gemma.model.analysis.expression.coexpression.SupportDetails)

Example 8 with Gene2GeneCoexpression

use of ubic.gemma.model.association.coexpression.Gene2GeneCoexpression in project Gemma by PavlidisLab.

the class LinkAnalysisPersisterImpl method initializeLinksFromOldData.

@Override
public void initializeLinksFromOldData(Taxon t) {
    Collection<Gene> genes = geneService.loadAll(t);
    Map<Long, Gene> idMap = EntityUtils.getIdMap(genes);
    /*
         * First count the old links for every gene, and remove genes that have too few. That set of genes has to be
         * passed in to the service so they would be recognized in the second gene. We have to do that counting as a
         * separate step because we need to know ahead of time. This might be more trouble than it is worth...
         */
    LinkAnalysisPersisterImpl.log.info("Counting old links for " + genes.size() + " genes.");
    Map<Gene, Integer> counts = gene2GeneCoexpressionService.countOldLinks(genes);
    int LIMIT = 100;
    Set<Long> skipGenes = new HashSet<>();
    for (Gene g : counts.keySet()) {
        if (counts.get(g) < LIMIT) {
            skipGenes.add(g.getId());
        }
    }
    if (skipGenes.size() == genes.size()) {
        throw new IllegalStateException("There weren't enough links to bother making any stubs.");
    }
    Map<NonPersistentNonOrderedCoexpLink, SupportDetails> linksSoFar = new HashMap<>();
    LinkAnalysisPersisterImpl.log.info("Creating stub links for up to " + genes.size() + " genes; " + skipGenes.size() + " genes will be ignored because they have too few links.");
    int numGenes = 0;
    int count = 0;
    for (Gene gene : genes) {
        Map<SupportDetails, Gene2GeneCoexpression> links = gene2GeneCoexpressionService.initializeLinksFromOldData(gene, idMap, linksSoFar, skipGenes);
        if (links == null || links.isEmpty())
            continue;
        count += links.size();
        /*
             * Keep track of links created so far (ignoring "direction") so we can resuse the supportDetails.
             */
        for (SupportDetails sd : links.keySet()) {
            assert sd.getId() != null;
            Gene2GeneCoexpression g2g = links.get(sd);
            assert g2g.getId() != null;
            assert g2g.getSupportDetails() != null && g2g.getSupportDetails().getId() != null;
            assert sd.equals(g2g.getSupportDetails());
            NonPersistentNonOrderedCoexpLink linkVO = new NonPersistentNonOrderedCoexpLink(g2g.getFirstGene(), g2g.getSecondGene(), g2g.isPositiveCorrelation());
            if (linksSoFar.containsKey(linkVO)) {
                // directions. Removing it will help us free up memory.
                assert sd.equals(linksSoFar.get(linkVO));
                linksSoFar.remove(linkVO);
            } else {
                linksSoFar.put(linkVO, sd);
            }
        }
        LinkAnalysisPersisterImpl.log.info(links.size() + " links created for " + gene + ", " + count + " links created so far.");
        if (++numGenes % 500 == 0) {
            LinkAnalysisPersisterImpl.log.info("***** " + numGenes + " processed");
        }
    }
}
Also used : Gene2GeneCoexpression(ubic.gemma.model.association.coexpression.Gene2GeneCoexpression) SupportDetails(ubic.gemma.model.analysis.expression.coexpression.SupportDetails) Gene(ubic.gemma.model.genome.Gene) NonPersistentNonOrderedCoexpLink(ubic.gemma.persistence.service.association.coexpression.NonPersistentNonOrderedCoexpLink)

Example 9 with Gene2GeneCoexpression

use of ubic.gemma.model.association.coexpression.Gene2GeneCoexpression in project Gemma by PavlidisLab.

the class CoexpressionDaoImpl method getCoexpressionFromDbViaGenes.

/**
 * Gene-focused query. Use this if you don't care about which data sets are involved (or if there are many data
 * sets), for a relatively small number of genes. This DOES NOT cache the results, the caller has to do that. It
 * also does not check the cache.
 *
 * @param geneIds   the gene IDs
 * @param className the class name
 * @return results without any limit on the size, each list is already sorted.
 */
private Map<Long, List<CoexpressionValueObject>> getCoexpressionFromDbViaGenes(Collection<Long> geneIds, String className) {
    Query q = this.buildQuery(geneIds, className);
    StopWatch timer = new StopWatch();
    timer.start();
    List<Gene2GeneCoexpression> rawResults = q.list();
    if (timer.getTime() > 1000) {
        CoexpressionDaoImpl.log.debug("Initial coexp query for " + geneIds.size() + "genes took " + timer.getTime() + "ms: " + rawResults.size() + " results");
        CoexpressionDaoImpl.log.debug("Query was: " + q.getQueryString());
    }
    if (rawResults.isEmpty())
        return new HashMap<>();
    timer.reset();
    timer.start();
    Map<Long, List<CoexpressionValueObject>> results = this.convertToValueObjects(rawResults, geneIds);
    for (Long g : results.keySet()) {
        List<CoexpressionValueObject> gc = results.get(g);
        Collections.sort(gc);
    }
    if (timer.getTime() > 100) {
        CoexpressionDaoImpl.log.debug("Convert to value objects, filter, sort and finish " + rawResults.size() + " results: " + timer.getTime() + "ms");
    }
    return results;
}
Also used : Gene2GeneCoexpression(ubic.gemma.model.association.coexpression.Gene2GeneCoexpression) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 10 with Gene2GeneCoexpression

use of ubic.gemma.model.association.coexpression.Gene2GeneCoexpression in project Gemma by PavlidisLab.

the class CoexpressionDaoImpl method getCoexpression.

@Override
@Transactional(readOnly = true)
public Collection<CoexpressionValueObject> getCoexpression(Taxon taxon, BioAssaySet experiment, boolean quick) {
    Session sess = this.getSessionFactory().getCurrentSession();
    // could just fetch linkId.
    Query q = sess.createQuery(" from " + CoexpressionQueryUtils.getExperimentLinkClassName(taxon) + " where experiment=:ee");
    q.setParameter("ee", experiment);
    List<ExperimentCoexpressionLink> links = q.list();
    Collection<CoexpressionValueObject> results = new HashSet<>();
    if (links.isEmpty()) {
        return results;
    }
    List<Long> linksToFetch = new ArrayList<>();
    for (ExperimentCoexpressionLink link : links) {
        linksToFetch.add(link.getLinkId());
    }
    String q2 = "from " + CoexpressionQueryUtils.getGeneLinkClassName(taxon) + " where id in (:ids)";
    BatchIterator<Long> it = BatchIterator.batches(linksToFetch, 1000);
    for (; it.hasNext(); ) {
        List<Gene2GeneCoexpression> rawResults = sess.createQuery(q2).setParameterList("ids", it.next()).list();
        for (Gene2GeneCoexpression g2g : rawResults) {
            CoexpressionValueObject g2gvo = new CoexpressionValueObject(g2g);
            results.add(g2gvo);
        }
    }
    if (!quick) {
        this.populateTestedInDetails(results);
    }
    return results;
}
Also used : ExperimentCoexpressionLink(ubic.gemma.model.association.coexpression.ExperimentCoexpressionLink) Gene2GeneCoexpression(ubic.gemma.model.association.coexpression.Gene2GeneCoexpression) Transactional(org.springframework.transaction.annotation.Transactional)

Aggregations

Gene2GeneCoexpression (ubic.gemma.model.association.coexpression.Gene2GeneCoexpression)11 SupportDetails (ubic.gemma.model.analysis.expression.coexpression.SupportDetails)5 StopWatch (org.apache.commons.lang3.time.StopWatch)4 Transactional (org.springframework.transaction.annotation.Transactional)2 Gene (ubic.gemma.model.genome.Gene)2 BigInteger (java.math.BigInteger)1 IdArrayValueObject (ubic.gemma.model.analysis.expression.coexpression.IdArrayValueObject)1 ExperimentCoexpressionLink (ubic.gemma.model.association.coexpression.ExperimentCoexpressionLink)1 GeneCoexpressionNodeDegreeValueObject (ubic.gemma.model.association.coexpression.GeneCoexpressionNodeDegreeValueObject)1 NonPersistentNonOrderedCoexpLink (ubic.gemma.persistence.service.association.coexpression.NonPersistentNonOrderedCoexpLink)1