Search in sources :

Example 1 with TaskCancelledException

use of ubic.gemma.persistence.util.TaskCancelledException in project Gemma by PavlidisLab.

the class DifferentialExpressionResultDaoImpl method findDiffExAnalysisResultIdsInResultSets.

@Override
public Map<Long, Map<Long, DiffExprGeneSearchResult>> findDiffExAnalysisResultIdsInResultSets(Collection<DiffExResultSetSummaryValueObject> resultSets, Collection<Long> geneIds) {
    Map<Long, Map<Long, DiffExprGeneSearchResult>> results = new HashMap<>();
    Session session = this.getSessionFactory().getCurrentSession();
    Map<Long, DiffExResultSetSummaryValueObject> resultSetIdsMap = EntityUtils.getIdMap(resultSets, "getResultSetId");
    Map<Long, Collection<Long>> foundInCache = this.fillFromCache(results, resultSetIdsMap.keySet(), geneIds);
    if (!foundInCache.isEmpty()) {
        AbstractDao.log.info("Results for " + foundInCache.size() + " resultsets found in cache");
    } else {
        AbstractDao.log.info("No results were in the cache");
    }
    Collection<Long> resultSetsNeeded = this.stripUnneededResultSets(foundInCache, resultSetIdsMap.keySet(), geneIds);
    // Are we finished?
    if (resultSetsNeeded.isEmpty()) {
        AbstractDao.log.info("All results were in the cache.");
        return results;
    }
    AbstractDao.log.info(foundInCache.size() + "/" + resultSetIdsMap.size() + " resultsSets had at least some cached results; still need to query " + resultSetsNeeded.size());
    assert !resultSetsNeeded.isEmpty();
    org.hibernate.SQLQuery queryObject = session.createSQLQuery(DifferentialExpressionResultDaoImpl.fetchBatchDifferentialExpressionAnalysisResultsByResultSetsAndGeneQuery);
    /*
         * These values have been tweaked to probe for performance issues.
         */
    int resultSetBatchSize = 50;
    int geneBatchSize = 100;
    if (resultSetsNeeded.size() > geneIds.size()) {
        resultSetBatchSize = Math.min(500, resultSetsNeeded.size());
        AbstractDao.log.info("Batching by result sets (" + resultSetsNeeded.size() + " resultSets); " + geneIds.size() + " genes; batch size=" + resultSetBatchSize);
    } else {
        geneBatchSize = Math.min(200, geneIds.size());
        AbstractDao.log.info("Batching by genes (" + geneIds.size() + " genes); " + resultSetsNeeded.size() + " resultSets; batch size=" + geneBatchSize);
    }
    final int numResultSetBatches = (int) Math.ceil(resultSetsNeeded.size() / resultSetBatchSize);
    queryObject.setFlushMode(FlushMode.MANUAL);
    StopWatch timer = new StopWatch();
    timer.start();
    int numResults = 0;
    long timeForFillingNonSig = 0;
    Map<Long, Map<Long, DiffExprGeneSearchResult>> resultsFromDb = new HashMap<>();
    int numResultSetBatchesDone = 0;
    // Iterate over batches of resultSets
    for (Collection<Long> resultSetIdBatch : new BatchIterator<>(resultSetsNeeded, resultSetBatchSize)) {
        if (AbstractDao.log.isDebugEnabled())
            AbstractDao.log.debug("Starting batch of resultsets: " + StringUtils.abbreviate(StringUtils.join(resultSetIdBatch, ","), 100));
        /*
             * Get the probes using the CommonQueries gene2cs. Otherwise we (in effect) end up doing this over and over
             * again.
             */
        Map<Long, Collection<Long>> cs2GeneIdMap = this.getProbesForGenesInResultSetBatch(session, geneIds, resultSetIdsMap, resultSetIdBatch);
        queryObject.setParameterList("rs_ids", resultSetIdBatch);
        int numGeneBatchesDone = 0;
        final int numGeneBatches = (int) Math.ceil(cs2GeneIdMap.size() / geneBatchSize);
        StopWatch innerQt = new StopWatch();
        // iterate over batches of probes (genes)
        for (Collection<Long> probeBatch : new BatchIterator<>(cs2GeneIdMap.keySet(), geneBatchSize)) {
            if (AbstractDao.log.isDebugEnabled())
                AbstractDao.log.debug("Starting batch of probes: " + StringUtils.abbreviate(StringUtils.join(probeBatch, ","), 100));
            // would it help to sort the probeBatch/
            List<Long> pbL = new Vector<>(probeBatch);
            Collections.sort(pbL);
            queryObject.setParameterList("probe_ids", pbL);
            innerQt.start();
            List<?> queryResult = queryObject.list();
            innerQt.stop();
            if (innerQt.getTime() > 2000) {
                // show the actual query with params.
                AbstractDao.log.info("Query time: " + innerQt.getTime() + "ms:\n " + queryObject.getQueryString().replace(":probe_ids", StringUtils.join(probeBatch, ",")).replace(":rs_ids", StringUtils.join(resultSetIdBatch, ",")));
            }
            innerQt.reset();
            /*
                 * Each query tuple are the probe, result, resultsSet, qvalue, pvalue.
                 */
            for (Object o : queryResult) {
                // Long resultSetId = ( ( BigInteger )((Object[])o)[2] ).longValue();
                // if (!resultSetId.equals)
                numResults += this.processResultTuple(o, resultsFromDb, cs2GeneIdMap);
            }
            if (timer.getTime() > 5000 && AbstractDao.log.isInfoEnabled()) {
                AbstractDao.log.info("Batch time: " + timer.getTime() + "ms; Fetched DiffEx " + numResults + " results so far. " + numResultSetBatchesDone + "/" + numResultSetBatches + " resultset batches completed. " + numGeneBatchesDone + "/" + numGeneBatches + " gene batches done.");
                timer.reset();
                timer.start();
            }
            // Check if task was cancelled.
            if (Thread.currentThread().isInterrupted()) {
                throw new TaskCancelledException("Search was cancelled");
            }
            numGeneBatchesDone++;
            if (DifferentialExpressionResultDaoImpl.CORRECTED_PVALUE_THRESHOLD_TO_BE_CONSIDERED_DIFF_EX < 1.0) {
                timeForFillingNonSig += this.fillNonSignificant(pbL, resultSetIdsMap, resultsFromDb, resultSetIdBatch, cs2GeneIdMap, session);
            }
        }
        // Check if task was cancelled.
        if (Thread.currentThread().isInterrupted()) {
            throw new TaskCancelledException("Search was cancelled");
        }
        numResultSetBatchesDone++;
    }
    if (timer.getTime() > 1000 && AbstractDao.log.isInfoEnabled()) {
        AbstractDao.log.info("Fetching DiffEx from DB took total of " + timer.getTime() + " ms : geneIds=" + StringUtils.abbreviate(StringUtils.join(geneIds, ","), 50) + " result set=" + StringUtils.abbreviate(StringUtils.join(resultSetsNeeded, ","), 50));
        if (timeForFillingNonSig > 100) {
            AbstractDao.log.info("Filling in non-significant values: " + timeForFillingNonSig + "ms in total");
        }
    }
    // Add the DB results to the cached results.
    this.addToCache(resultsFromDb, resultSetsNeeded, geneIds);
    for (Long resultSetId : resultsFromDb.keySet()) {
        Map<Long, DiffExprGeneSearchResult> geneResults = resultsFromDb.get(resultSetId);
        if (results.containsKey(resultSetId)) {
            results.get(resultSetId).putAll(geneResults);
        } else {
            results.put(resultSetId, geneResults);
        }
    }
    return results;
}
Also used : BatchIterator(ubic.basecode.util.BatchIterator) org.hibernate(org.hibernate) StopWatch(org.apache.commons.lang3.time.StopWatch) GeneValueObject(ubic.gemma.model.genome.gene.GeneValueObject) TaskCancelledException(ubic.gemma.persistence.util.TaskCancelledException)

Aggregations

StopWatch (org.apache.commons.lang3.time.StopWatch)1 org.hibernate (org.hibernate)1 BatchIterator (ubic.basecode.util.BatchIterator)1 GeneValueObject (ubic.gemma.model.genome.gene.GeneValueObject)1 TaskCancelledException (ubic.gemma.persistence.util.TaskCancelledException)1