Search in sources :

Example 1 with GeneCoexpressionTestedIn

use of ubic.gemma.model.analysis.expression.coexpression.GeneCoexpressionTestedIn in project Gemma by PavlidisLab.

the class CoexpressionDaoImpl method updatedTestedIn.

/**
 * Mark the genes as being tested for coexpression in the data set and persist the information in the database. This
 * is run at the tail end of coexpression analysis for the data set.
 *
 * @param ee          the data set
 * @param genesTested the genes
 */
private void updatedTestedIn(BioAssaySet ee, Collection<Gene> genesTested) {
    Session sess = this.getSessionFactory().getCurrentSession();
    Query q = sess.createQuery("from GeneCoexpressionTestedIn where geneId in (:ids)");
    Set<Long> seenGenes = new HashSet<>();
    Collection<Long> geneids = EntityUtils.getIds(genesTested);
    BatchIterator<Long> bi = new BatchIterator<>(geneids, 512);
    for (; bi.hasNext(); ) {
        q.setParameterList("ids", bi.next());
        List<GeneCoexpressionTestedIn> list = q.list();
        int count = 0;
        for (GeneCoexpressionTestedIn gcti : list) {
            // int old = gcti.getNumIds(); // debug code
            gcti.addEntity(ee.getId());
            sess.update(gcti);
            assert gcti.isIncluded(ee.getId());
            seenGenes.add(gcti.getGeneId());
            if (++count % 256 == 0) {
                sess.flush();
                sess.clear();
            }
        }
    }
    if (!seenGenes.isEmpty()) {
        CoexpressionDaoImpl.log.info("Updated tested-in information for " + seenGenes.size() + " genes");
        // TODO do it just for the genes changed.
        this.geneTestedInCache.clearCache();
    }
    sess.flush();
    sess.clear();
    // discover genes which don't have an entry at all.
    geneids.removeAll(seenGenes);
    if (geneids.isEmpty()) {
        return;
    }
    CoexpressionDaoImpl.log.info("Adding tested-in information for " + geneids.size() + " genes");
    int count = 0;
    for (Long id : geneids) {
        GeneCoexpressionTestedIn gcti = new GeneCoexpressionTestedIn(id);
        gcti.addEntity(ee.getId());
        assert gcti.isIncluded(ee.getId());
        assert gcti.getNumIds() == 1;
        sess.save(gcti);
        if (++count % 256 == 0) {
            sess.flush();
            sess.clear();
        }
    }
}
Also used : GeneCoexpressionTestedIn(ubic.gemma.model.analysis.expression.coexpression.GeneCoexpressionTestedIn) BatchIterator(ubic.basecode.util.BatchIterator)

Example 2 with GeneCoexpressionTestedIn

use of ubic.gemma.model.analysis.expression.coexpression.GeneCoexpressionTestedIn in project Gemma by PavlidisLab.

the class CoexpressionDaoImpl method removeTestedIn.

/**
 * Reverting the "genes-tested-in" information is annoying: we don't know which genes to fix ahead of time. So we
 * have to check all genes for the taxon.
 *
 * @param experiment ee
 * @param t          t
 */
private void removeTestedIn(Taxon t, BioAssaySet experiment) {
    Session sess = this.getSessionFactory().getCurrentSession();
    List<Long> geneids = sess.createQuery("select id from Gene where taxon = :t").setParameter("t", t).list();
    CoexpressionDaoImpl.log.info("Removing 'tested-in' information for up to " + geneids.size() + " genes for " + experiment);
    BatchIterator<Long> it = BatchIterator.batches(geneids, 1000);
    for (; it.hasNext(); ) {
        Collection<Long> next = it.next();
        for (GeneCoexpressionTestedIn gcti : (Collection<GeneCoexpressionTestedIn>) sess.createQuery("from GeneCoexpressionTestedIn where geneId in (:ids)").setParameterList("ids", next).list()) {
            // note this might be a no-op.
            gcti.removeEntity(experiment.getId());
            sess.update(gcti);
        }
        sess.flush();
        sess.clear();
    }
}
Also used : GeneCoexpressionTestedIn(ubic.gemma.model.analysis.expression.coexpression.GeneCoexpressionTestedIn)

Example 3 with GeneCoexpressionTestedIn

use of ubic.gemma.model.analysis.expression.coexpression.GeneCoexpressionTestedIn in project Gemma by PavlidisLab.

the class CoexpressionDaoImpl method populateTestedInDetails.

/**
 * When fetching data. Requires database hits, but values for testedin are cached.
 *
 * @param g2gLinks links
 */
private void populateTestedInDetails(Collection<CoexpressionValueObject> g2gLinks) {
    assert !g2gLinks.isEmpty();
    StopWatch timer = new StopWatch();
    timer.start();
    // GeneCoexpressionTestedIn are one-per-gene so we first gather up all the unique genes we have to look at.
    Map<Long, GeneCoexpressionTestedIn> gcTestedIn = new HashMap<>();
    Set<Long> genes = new HashSet<>();
    for (CoexpressionValueObject gene2GeneCoexpression : g2gLinks) {
        Long queryGeneId = gene2GeneCoexpression.getQueryGeneId();
        GeneCoexpressionTestedIn queryGeneTestedIn = geneTestedInCache.get(queryGeneId);
        if (queryGeneTestedIn == null) {
            genes.add(queryGeneId);
        } else {
            gcTestedIn.put(queryGeneId, queryGeneTestedIn);
        }
        Long coexGeneId = gene2GeneCoexpression.getCoexGeneId();
        GeneCoexpressionTestedIn coexGeneTestedIn = geneTestedInCache.get(coexGeneId);
        if (coexGeneTestedIn == null) {
            genes.add(coexGeneId);
        } else {
            gcTestedIn.put(coexGeneId, coexGeneTestedIn);
        }
    }
    if (!genes.isEmpty()) {
        // fetch the GeneCoexpressionTestedIn information for those genes which were not cached.
        Query q = this.getSessionFactory().getCurrentSession().createQuery("from GeneCoexpressionTestedIn g where geneId in (:genes)");
        int BATCH_SIZE = 512;
        int n = 0;
        for (BatchIterator<Long> it = BatchIterator.batches(genes, BATCH_SIZE); it.hasNext(); ) {
            Collection<Long> g = it.next();
            q.setParameterList("genes", g);
            List<GeneCoexpressionTestedIn> list = q.list();
            Map<Long, GeneCoexpressionTestedIn> idMap = EntityUtils.getIdMap(list, "getGeneId");
            geneTestedInCache.cache(idMap);
            gcTestedIn.putAll(idMap);
            ++n;
        }
        if (timer.getTime() > 1000)
            CoexpressionDaoImpl.log.debug("Query for tested-in details for " + genes.size() + " genes: " + timer.getTime() + " ms (" + n + " batches), values fetched or from cache size=" + gcTestedIn.size());
    }
    timer.reset();
    timer.start();
    // copy it into the g2g value objects.
    for (CoexpressionValueObject g2g : g2gLinks) {
        assert g2g.getNumDatasetsSupporting() > 0 : g2g + " has support less than 1";
        Long id1 = g2g.getQueryGeneId();
        Long id2 = g2g.getCoexGeneId();
        GeneCoexpressionTestedIn geneCoexpressionTestedIn1 = gcTestedIn.get(id1);
        GeneCoexpressionTestedIn geneCoexpressionTestedIn2 = gcTestedIn.get(id2);
        if (geneCoexpressionTestedIn1 == null || geneCoexpressionTestedIn2 == null) {
            throw new IllegalStateException("Was missing GeneCoexpressionTestedIn data for genes in " + g2g);
        }
        if (geneCoexpressionTestedIn1.getNumDatasetsTestedIn() == 0 || geneCoexpressionTestedIn2.getNumDatasetsTestedIn() == 0) {
            throw new IllegalStateException(g2g + ": had no data sets tested in: " + StringUtils.join(geneCoexpressionTestedIn1.getIds(), ",") + " :: " + StringUtils.join(geneCoexpressionTestedIn2.getIds(), ","));
        }
        Set<Long> testedIn = geneCoexpressionTestedIn1.andSet(geneCoexpressionTestedIn2);
        if (testedIn.isEmpty()) {
            throw new IllegalStateException(g2g + ": had no data sets tested in: " + StringUtils.join(geneCoexpressionTestedIn1.getIds(), ",") + " :: " + StringUtils.join(geneCoexpressionTestedIn2.getIds(), ","));
        }
        g2g.setTestedInDatasets(testedIn);
    }
    if (timer.getTime() > 100)
        CoexpressionDaoImpl.log.debug("Populate into value obects: " + timer.getTime() + "ms (" + g2gLinks.size() + " links)");
}
Also used : StopWatch(org.apache.commons.lang3.time.StopWatch) GeneCoexpressionTestedIn(ubic.gemma.model.analysis.expression.coexpression.GeneCoexpressionTestedIn)

Aggregations

GeneCoexpressionTestedIn (ubic.gemma.model.analysis.expression.coexpression.GeneCoexpressionTestedIn)3 StopWatch (org.apache.commons.lang3.time.StopWatch)1 BatchIterator (ubic.basecode.util.BatchIterator)1