Search in sources :

Example 1 with BatchIterator

use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.

the class DesignElementDataVectorDaoImpl method getVectorsForProbesInExperiments.

/**
 * @param ee      ee
 * @param cs2gene Map of probes to genes.
 * @return map of vectors to gene ids.
 */
Map<T, Collection<Long>> getVectorsForProbesInExperiments(Long ee, Map<Long, Collection<Long>> cs2gene) {
    // Do not do in clause for experiments, as it can't use the indices
    // language=HQL
    String queryString = "select dedv, dedv.designElement.id from ProcessedExpressionDataVector dedv fetch all properties" + " where dedv.designElement.id in ( :cs ) and dedv.expressionExperiment.id = :eeId ";
    Session session = this.getSessionFactory().getCurrentSession();
    org.hibernate.Query queryObject = session.createQuery(queryString);
    queryObject.setReadOnly(true);
    queryObject.setFlushMode(FlushMode.MANUAL);
    Map<T, Collection<Long>> dedv2genes = new HashMap<>();
    StopWatch timer = new StopWatch();
    timer.start();
    queryObject.setLong("eeId", ee);
    int batchSize = 100;
    for (Collection<Long> batch : new BatchIterator<>(cs2gene.keySet(), batchSize)) {
        this.getVectorsBatch(cs2gene, queryObject, dedv2genes, batch);
    }
    if (timer.getTime() > Math.max(200, 20 * dedv2genes.size())) {
        AbstractDao.log.info("Fetched " + dedv2genes.size() + " vectors for " + cs2gene.size() + " probes in " + timer.getTime() + "ms\n" + "Vector query was: " + queryString);
    }
    return dedv2genes;
}
Also used : HashMap(java.util.HashMap) BatchIterator(ubic.basecode.util.BatchIterator) org.hibernate(org.hibernate) StopWatch(org.apache.commons.lang3.time.StopWatch) Collection(java.util.Collection)

Example 2 with BatchIterator

use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.

the class DesignElementDataVectorDaoImpl method getVectorsForProbesInExperiments.

Map<T, Collection<Long>> getVectorsForProbesInExperiments(Map<Long, Collection<Long>> cs2gene) {
    // language=HQL
    String queryString = "select dedv, dedv.designElement.id from ProcessedExpressionDataVector dedv fetch all properties" + " where dedv.designElement.id in ( :cs ) ";
    Session session = this.getSessionFactory().getCurrentSession();
    org.hibernate.Query queryObject = session.createQuery(queryString);
    queryObject.setReadOnly(true);
    queryObject.setFlushMode(FlushMode.MANUAL);
    Map<T, Collection<Long>> dedv2genes = new HashMap<>();
    StopWatch timer = new StopWatch();
    timer.start();
    int batchSize = 100;
    for (Collection<Long> batch : new BatchIterator<>(cs2gene.keySet(), batchSize)) {
        this.getVectorsBatch(cs2gene, queryObject, dedv2genes, batch);
    }
    if (timer.getTime() > Math.max(200, 20 * dedv2genes.size())) {
        AbstractDao.log.info("Fetched " + dedv2genes.size() + " vectors for " + cs2gene.size() + " probes in " + timer.getTime() + "ms\n" + "Vector query was: " + queryString);
    }
    return dedv2genes;
}
Also used : HashMap(java.util.HashMap) BatchIterator(ubic.basecode.util.BatchIterator) org.hibernate(org.hibernate) StopWatch(org.apache.commons.lang3.time.StopWatch) Collection(java.util.Collection)

Example 3 with BatchIterator

use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.

the class GeneDaoImpl method loadThawed.

@Override
public Collection<Gene> loadThawed(Collection<Long> ids) {
    Collection<Gene> result = new HashSet<>();
    if (ids.isEmpty())
        return result;
    StopWatch timer = new StopWatch();
    timer.start();
    for (Collection<Long> batch : new BatchIterator<>(ids, GeneDaoImpl.BATCH_SIZE)) {
        result.addAll(this.doLoadThawedLite(batch));
    }
    if (timer.getTime() > 1000) {
        AbstractDao.log.debug("Load+thawRawAndProcessed " + result.size() + " genes: " + timer.getTime() + "ms");
    }
    return result;
}
Also used : Gene(ubic.gemma.model.genome.Gene) BatchIterator(ubic.basecode.util.BatchIterator) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 4 with BatchIterator

use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.

the class GeneDaoImpl method loadThawedLiter.

@Override
public Collection<Gene> loadThawedLiter(Collection<Long> ids) {
    Collection<Gene> result = new HashSet<>();
    if (ids.isEmpty())
        return result;
    StopWatch timer = new StopWatch();
    timer.start();
    for (Collection<Long> batch : new BatchIterator<>(ids, GeneDaoImpl.BATCH_SIZE)) {
        result.addAll(this.doLoadThawedLiter(batch));
    }
    if (timer.getTime() > 1000) {
        AbstractDao.log.debug("Load+thawRawAndProcessed " + result.size() + " genes: " + timer.getTime() + "ms");
    }
    return result;
}
Also used : Gene(ubic.gemma.model.genome.Gene) BatchIterator(ubic.basecode.util.BatchIterator) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 5 with BatchIterator

use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.

the class CoexpressionDaoImpl method updatedTestedIn.

/**
 * Mark the genes as being tested for coexpression in the data set and persist the information in the database. This
 * is run at the tail end of coexpression analysis for the data set.
 *
 * @param ee          the data set
 * @param genesTested the genes
 */
private void updatedTestedIn(BioAssaySet ee, Collection<Gene> genesTested) {
    Session sess = this.getSessionFactory().getCurrentSession();
    Query q = sess.createQuery("from GeneCoexpressionTestedIn where geneId in (:ids)");
    Set<Long> seenGenes = new HashSet<>();
    Collection<Long> geneids = EntityUtils.getIds(genesTested);
    BatchIterator<Long> bi = new BatchIterator<>(geneids, 512);
    for (; bi.hasNext(); ) {
        q.setParameterList("ids", bi.next());
        List<GeneCoexpressionTestedIn> list = q.list();
        int count = 0;
        for (GeneCoexpressionTestedIn gcti : list) {
            // int old = gcti.getNumIds(); // debug code
            gcti.addEntity(ee.getId());
            sess.update(gcti);
            assert gcti.isIncluded(ee.getId());
            seenGenes.add(gcti.getGeneId());
            if (++count % 256 == 0) {
                sess.flush();
                sess.clear();
            }
        }
    }
    if (!seenGenes.isEmpty()) {
        CoexpressionDaoImpl.log.info("Updated tested-in information for " + seenGenes.size() + " genes");
        // TODO do it just for the genes changed.
        this.geneTestedInCache.clearCache();
    }
    sess.flush();
    sess.clear();
    // discover genes which don't have an entry at all.
    geneids.removeAll(seenGenes);
    if (geneids.isEmpty()) {
        return;
    }
    CoexpressionDaoImpl.log.info("Adding tested-in information for " + geneids.size() + " genes");
    int count = 0;
    for (Long id : geneids) {
        GeneCoexpressionTestedIn gcti = new GeneCoexpressionTestedIn(id);
        gcti.addEntity(ee.getId());
        assert gcti.isIncluded(ee.getId());
        assert gcti.getNumIds() == 1;
        sess.save(gcti);
        if (++count % 256 == 0) {
            sess.flush();
            sess.clear();
        }
    }
}
Also used : GeneCoexpressionTestedIn(ubic.gemma.model.analysis.expression.coexpression.GeneCoexpressionTestedIn) BatchIterator(ubic.basecode.util.BatchIterator)

Aggregations

BatchIterator (ubic.basecode.util.BatchIterator)13 StopWatch (org.apache.commons.lang3.time.StopWatch)9 Gene (ubic.gemma.model.genome.Gene)5 org.hibernate (org.hibernate)3 Collection (java.util.Collection)2 HashMap (java.util.HashMap)2 GeneValueObject (ubic.gemma.model.genome.gene.GeneValueObject)2 BigInteger (java.math.BigInteger)1 GeneCoexpressionTestedIn (ubic.gemma.model.analysis.expression.coexpression.GeneCoexpressionTestedIn)1 Characteristic (ubic.gemma.model.common.description.Characteristic)1 VocabCharacteristic (ubic.gemma.model.common.description.VocabCharacteristic)1 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)1 BioAssayValueObject (ubic.gemma.model.expression.bioAssay.BioAssayValueObject)1 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)1 CompositeSequenceValueObject (ubic.gemma.model.expression.designElement.CompositeSequenceValueObject)1 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)1 ExpressionExperimentValueObject (ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject)1 CharacteristicValueObject (ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject)1 TaskCancelledException (ubic.gemma.persistence.util.TaskCancelledException)1