Search in sources :

Example 6 with BatchIterator

use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.

the class GeneDaoImpl method findByOfficialSymbols.

@Override
public Map<String, Gene> findByOfficialSymbols(Collection<String> query, Long taxonId) {
    Map<String, Gene> result = new HashMap<>();
    // language=HQL
    final String queryString = "select g from Gene as g join fetch g.taxon t where g.officialSymbol in (:symbols) and t.id = :taxonId";
    for (Collection<String> batch : new BatchIterator<>(query, GeneDaoImpl.BATCH_SIZE)) {
        // noinspection unchecked
        List<Gene> results = this.getHibernateTemplate().findByNamedParam(queryString, new String[] { "symbols", "taxonId" }, new Object[] { batch, taxonId });
        for (Gene g : results) {
            result.put(g.getOfficialSymbol().toLowerCase(), g);
        }
    }
    return result;
}
Also used : Gene(ubic.gemma.model.genome.Gene) BatchIterator(ubic.basecode.util.BatchIterator)

Example 7 with BatchIterator

use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.

the class GeneDaoImpl method findByNcbiIds.

@Override
public Map<Integer, Gene> findByNcbiIds(Collection<Integer> ncbiIds) {
    Map<Integer, Gene> result = new HashMap<>();
    // language=HQL
    final String queryString = "from Gene g where g.ncbiGeneId in (:ncbi)";
    for (Collection<Integer> batch : new BatchIterator<>(ncbiIds, GeneDaoImpl.BATCH_SIZE)) {
        // noinspection unchecked
        List<Gene> results = this.getHibernateTemplate().findByNamedParam(queryString, "ncbi", batch);
        for (Gene g : results) {
            result.put(g.getNcbiGeneId(), g);
        }
    }
    return result;
}
Also used : Gene(ubic.gemma.model.genome.Gene) BatchIterator(ubic.basecode.util.BatchIterator)

Example 8 with BatchIterator

use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorDaoImpl method getRanks.

@Override
public Map<ExpressionExperiment, Map<Gene, Collection<Double>>> getRanks(Collection<ExpressionExperiment> expressionExperiments, Collection<Gene> genes, RankMethod method) {
    Collection<ArrayDesign> arrayDesigns = CommonQueries.getArrayDesignsUsed(EntityUtils.getIds(expressionExperiments), this.getSessionFactory().getCurrentSession()).keySet();
    // this could be further improved by getting probes specific to experiments in batches.
    Map<CompositeSequence, Collection<Gene>> cs2gene = CommonQueries.getCs2GeneMap(genes, arrayDesigns, this.getSessionFactory().getCurrentSession());
    if (cs2gene.isEmpty()) {
        AbstractDao.log.warn("No composite sequences found for genes");
        return new HashMap<>();
    }
    Map<ExpressionExperiment, Map<Gene, Collection<Double>>> result = new HashMap<>();
    BatchIterator<CompositeSequence> batchIterator = new BatchIterator<>(cs2gene.keySet(), 500);
    for (Collection<CompositeSequence> batch : batchIterator) {
        // language=HQL
        final String queryString = "select distinct dedv.expressionExperiment, dedv.designElement, dedv.rankByMean, " + "dedv.rankByMax from ProcessedExpressionDataVector dedv " + " where dedv.designElement in ( :cs ) and dedv.expressionExperiment in (:ees) ";
        List qr = this.getSessionFactory().getCurrentSession().createQuery(queryString).setParameter("cs", batch).setParameterList("ees", expressionExperiments).list();
        for (Object o : qr) {
            Object[] oa = (Object[]) o;
            ExpressionExperiment e = (ExpressionExperiment) oa[0];
            CompositeSequence d = (CompositeSequence) oa[1];
            Double rMean = oa[2] == null ? Double.NaN : (Double) oa[2];
            Double rMax = oa[3] == null ? Double.NaN : (Double) oa[3];
            if (!result.containsKey(e)) {
                result.put(e, new HashMap<Gene, Collection<Double>>());
            }
            Map<Gene, Collection<Double>> rMap = result.get(e);
            Collection<Gene> genes4probe = cs2gene.get(d);
            this.addToGene(method, rMap, rMean, rMax, genes4probe);
        }
    }
    return result;
}
Also used : ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) BatchIterator(ubic.basecode.util.BatchIterator) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) Gene(ubic.gemma.model.genome.Gene) BioAssayValueObject(ubic.gemma.model.expression.bioAssay.BioAssayValueObject) ExpressionExperimentValueObject(ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject) CompositeSequenceValueObject(ubic.gemma.model.expression.designElement.CompositeSequenceValueObject)

Example 9 with BatchIterator

use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.

the class DifferentialExpressionResultDaoImpl method load.

@Override
public Collection<DifferentialExpressionAnalysisResult> load(Collection<Long> ids) {
    // language=HQL
    final String queryString = "from DifferentialExpressionAnalysisResultImpl dea where dea.id in (:ids)";
    Collection<DifferentialExpressionAnalysisResult> probeResults = new HashSet<>();
    if (ids.isEmpty()) {
        return probeResults;
    }
    // previously: 500.
    int BATCH_SIZE = 1000;
    for (Collection<Long> batch : new BatchIterator<>(ids, BATCH_SIZE)) {
        StopWatch timer = new StopWatch();
        timer.start();
        // noinspection unchecked
        probeResults.addAll(this.getSessionFactory().getCurrentSession().createQuery(queryString).setParameterList("ids", batch).list());
        if (timer.getTime() > 1000) {
            AbstractDao.log.info("Fetch " + batch.size() + "/" + ids.size() + " results with contrasts: " + timer.getTime() + "ms; query was\n " + queryString);
        }
    }
    return probeResults;
}
Also used : BatchIterator(ubic.basecode.util.BatchIterator) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 10 with BatchIterator

use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.

the class DifferentialExpressionResultDaoImpl method findDiffExAnalysisResultIdsInResultSets.

@Override
public Map<Long, Map<Long, DiffExprGeneSearchResult>> findDiffExAnalysisResultIdsInResultSets(Collection<DiffExResultSetSummaryValueObject> resultSets, Collection<Long> geneIds) {
    Map<Long, Map<Long, DiffExprGeneSearchResult>> results = new HashMap<>();
    Session session = this.getSessionFactory().getCurrentSession();
    Map<Long, DiffExResultSetSummaryValueObject> resultSetIdsMap = EntityUtils.getIdMap(resultSets, "getResultSetId");
    Map<Long, Collection<Long>> foundInCache = this.fillFromCache(results, resultSetIdsMap.keySet(), geneIds);
    if (!foundInCache.isEmpty()) {
        AbstractDao.log.info("Results for " + foundInCache.size() + " resultsets found in cache");
    } else {
        AbstractDao.log.info("No results were in the cache");
    }
    Collection<Long> resultSetsNeeded = this.stripUnneededResultSets(foundInCache, resultSetIdsMap.keySet(), geneIds);
    // Are we finished?
    if (resultSetsNeeded.isEmpty()) {
        AbstractDao.log.info("All results were in the cache.");
        return results;
    }
    AbstractDao.log.info(foundInCache.size() + "/" + resultSetIdsMap.size() + " resultsSets had at least some cached results; still need to query " + resultSetsNeeded.size());
    assert !resultSetsNeeded.isEmpty();
    org.hibernate.SQLQuery queryObject = session.createSQLQuery(DifferentialExpressionResultDaoImpl.fetchBatchDifferentialExpressionAnalysisResultsByResultSetsAndGeneQuery);
    /*
         * These values have been tweaked to probe for performance issues.
         */
    int resultSetBatchSize = 50;
    int geneBatchSize = 100;
    if (resultSetsNeeded.size() > geneIds.size()) {
        resultSetBatchSize = Math.min(500, resultSetsNeeded.size());
        AbstractDao.log.info("Batching by result sets (" + resultSetsNeeded.size() + " resultSets); " + geneIds.size() + " genes; batch size=" + resultSetBatchSize);
    } else {
        geneBatchSize = Math.min(200, geneIds.size());
        AbstractDao.log.info("Batching by genes (" + geneIds.size() + " genes); " + resultSetsNeeded.size() + " resultSets; batch size=" + geneBatchSize);
    }
    final int numResultSetBatches = (int) Math.ceil(resultSetsNeeded.size() / resultSetBatchSize);
    queryObject.setFlushMode(FlushMode.MANUAL);
    StopWatch timer = new StopWatch();
    timer.start();
    int numResults = 0;
    long timeForFillingNonSig = 0;
    Map<Long, Map<Long, DiffExprGeneSearchResult>> resultsFromDb = new HashMap<>();
    int numResultSetBatchesDone = 0;
    // Iterate over batches of resultSets
    for (Collection<Long> resultSetIdBatch : new BatchIterator<>(resultSetsNeeded, resultSetBatchSize)) {
        if (AbstractDao.log.isDebugEnabled())
            AbstractDao.log.debug("Starting batch of resultsets: " + StringUtils.abbreviate(StringUtils.join(resultSetIdBatch, ","), 100));
        /*
             * Get the probes using the CommonQueries gene2cs. Otherwise we (in effect) end up doing this over and over
             * again.
             */
        Map<Long, Collection<Long>> cs2GeneIdMap = this.getProbesForGenesInResultSetBatch(session, geneIds, resultSetIdsMap, resultSetIdBatch);
        queryObject.setParameterList("rs_ids", resultSetIdBatch);
        int numGeneBatchesDone = 0;
        final int numGeneBatches = (int) Math.ceil(cs2GeneIdMap.size() / geneBatchSize);
        StopWatch innerQt = new StopWatch();
        // iterate over batches of probes (genes)
        for (Collection<Long> probeBatch : new BatchIterator<>(cs2GeneIdMap.keySet(), geneBatchSize)) {
            if (AbstractDao.log.isDebugEnabled())
                AbstractDao.log.debug("Starting batch of probes: " + StringUtils.abbreviate(StringUtils.join(probeBatch, ","), 100));
            // would it help to sort the probeBatch/
            List<Long> pbL = new Vector<>(probeBatch);
            Collections.sort(pbL);
            queryObject.setParameterList("probe_ids", pbL);
            innerQt.start();
            List<?> queryResult = queryObject.list();
            innerQt.stop();
            if (innerQt.getTime() > 2000) {
                // show the actual query with params.
                AbstractDao.log.info("Query time: " + innerQt.getTime() + "ms:\n " + queryObject.getQueryString().replace(":probe_ids", StringUtils.join(probeBatch, ",")).replace(":rs_ids", StringUtils.join(resultSetIdBatch, ",")));
            }
            innerQt.reset();
            /*
                 * Each query tuple are the probe, result, resultsSet, qvalue, pvalue.
                 */
            for (Object o : queryResult) {
                // Long resultSetId = ( ( BigInteger )((Object[])o)[2] ).longValue();
                // if (!resultSetId.equals)
                numResults += this.processResultTuple(o, resultsFromDb, cs2GeneIdMap);
            }
            if (timer.getTime() > 5000 && AbstractDao.log.isInfoEnabled()) {
                AbstractDao.log.info("Batch time: " + timer.getTime() + "ms; Fetched DiffEx " + numResults + " results so far. " + numResultSetBatchesDone + "/" + numResultSetBatches + " resultset batches completed. " + numGeneBatchesDone + "/" + numGeneBatches + " gene batches done.");
                timer.reset();
                timer.start();
            }
            // Check if task was cancelled.
            if (Thread.currentThread().isInterrupted()) {
                throw new TaskCancelledException("Search was cancelled");
            }
            numGeneBatchesDone++;
            if (DifferentialExpressionResultDaoImpl.CORRECTED_PVALUE_THRESHOLD_TO_BE_CONSIDERED_DIFF_EX < 1.0) {
                timeForFillingNonSig += this.fillNonSignificant(pbL, resultSetIdsMap, resultsFromDb, resultSetIdBatch, cs2GeneIdMap, session);
            }
        }
        // Check if task was cancelled.
        if (Thread.currentThread().isInterrupted()) {
            throw new TaskCancelledException("Search was cancelled");
        }
        numResultSetBatchesDone++;
    }
    if (timer.getTime() > 1000 && AbstractDao.log.isInfoEnabled()) {
        AbstractDao.log.info("Fetching DiffEx from DB took total of " + timer.getTime() + " ms : geneIds=" + StringUtils.abbreviate(StringUtils.join(geneIds, ","), 50) + " result set=" + StringUtils.abbreviate(StringUtils.join(resultSetsNeeded, ","), 50));
        if (timeForFillingNonSig > 100) {
            AbstractDao.log.info("Filling in non-significant values: " + timeForFillingNonSig + "ms in total");
        }
    }
    // Add the DB results to the cached results.
    this.addToCache(resultsFromDb, resultSetsNeeded, geneIds);
    for (Long resultSetId : resultsFromDb.keySet()) {
        Map<Long, DiffExprGeneSearchResult> geneResults = resultsFromDb.get(resultSetId);
        if (results.containsKey(resultSetId)) {
            results.get(resultSetId).putAll(geneResults);
        } else {
            results.put(resultSetId, geneResults);
        }
    }
    return results;
}
Also used : BatchIterator(ubic.basecode.util.BatchIterator) org.hibernate(org.hibernate) StopWatch(org.apache.commons.lang3.time.StopWatch) GeneValueObject(ubic.gemma.model.genome.gene.GeneValueObject) TaskCancelledException(ubic.gemma.persistence.util.TaskCancelledException)

Aggregations

BatchIterator (ubic.basecode.util.BatchIterator)13 StopWatch (org.apache.commons.lang3.time.StopWatch)9 Gene (ubic.gemma.model.genome.Gene)5 org.hibernate (org.hibernate)3 Collection (java.util.Collection)2 HashMap (java.util.HashMap)2 GeneValueObject (ubic.gemma.model.genome.gene.GeneValueObject)2 BigInteger (java.math.BigInteger)1 GeneCoexpressionTestedIn (ubic.gemma.model.analysis.expression.coexpression.GeneCoexpressionTestedIn)1 Characteristic (ubic.gemma.model.common.description.Characteristic)1 VocabCharacteristic (ubic.gemma.model.common.description.VocabCharacteristic)1 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)1 BioAssayValueObject (ubic.gemma.model.expression.bioAssay.BioAssayValueObject)1 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)1 CompositeSequenceValueObject (ubic.gemma.model.expression.designElement.CompositeSequenceValueObject)1 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)1 ExpressionExperimentValueObject (ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject)1 CharacteristicValueObject (ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject)1 TaskCancelledException (ubic.gemma.persistence.util.TaskCancelledException)1