use of ubic.gemma.persistence.util.TaskCancelledException in project Gemma by PavlidisLab.
the class DifferentialExpressionResultDaoImpl method findDiffExAnalysisResultIdsInResultSets.
@Override
public Map<Long, Map<Long, DiffExprGeneSearchResult>> findDiffExAnalysisResultIdsInResultSets(Collection<DiffExResultSetSummaryValueObject> resultSets, Collection<Long> geneIds) {
Map<Long, Map<Long, DiffExprGeneSearchResult>> results = new HashMap<>();
Session session = this.getSessionFactory().getCurrentSession();
Map<Long, DiffExResultSetSummaryValueObject> resultSetIdsMap = EntityUtils.getIdMap(resultSets, "getResultSetId");
Map<Long, Collection<Long>> foundInCache = this.fillFromCache(results, resultSetIdsMap.keySet(), geneIds);
if (!foundInCache.isEmpty()) {
AbstractDao.log.info("Results for " + foundInCache.size() + " resultsets found in cache");
} else {
AbstractDao.log.info("No results were in the cache");
}
Collection<Long> resultSetsNeeded = this.stripUnneededResultSets(foundInCache, resultSetIdsMap.keySet(), geneIds);
// Are we finished?
if (resultSetsNeeded.isEmpty()) {
AbstractDao.log.info("All results were in the cache.");
return results;
}
AbstractDao.log.info(foundInCache.size() + "/" + resultSetIdsMap.size() + " resultsSets had at least some cached results; still need to query " + resultSetsNeeded.size());
assert !resultSetsNeeded.isEmpty();
org.hibernate.SQLQuery queryObject = session.createSQLQuery(DifferentialExpressionResultDaoImpl.fetchBatchDifferentialExpressionAnalysisResultsByResultSetsAndGeneQuery);
/*
* These values have been tweaked to probe for performance issues.
*/
int resultSetBatchSize = 50;
int geneBatchSize = 100;
if (resultSetsNeeded.size() > geneIds.size()) {
resultSetBatchSize = Math.min(500, resultSetsNeeded.size());
AbstractDao.log.info("Batching by result sets (" + resultSetsNeeded.size() + " resultSets); " + geneIds.size() + " genes; batch size=" + resultSetBatchSize);
} else {
geneBatchSize = Math.min(200, geneIds.size());
AbstractDao.log.info("Batching by genes (" + geneIds.size() + " genes); " + resultSetsNeeded.size() + " resultSets; batch size=" + geneBatchSize);
}
final int numResultSetBatches = (int) Math.ceil(resultSetsNeeded.size() / resultSetBatchSize);
queryObject.setFlushMode(FlushMode.MANUAL);
StopWatch timer = new StopWatch();
timer.start();
int numResults = 0;
long timeForFillingNonSig = 0;
Map<Long, Map<Long, DiffExprGeneSearchResult>> resultsFromDb = new HashMap<>();
int numResultSetBatchesDone = 0;
// Iterate over batches of resultSets
for (Collection<Long> resultSetIdBatch : new BatchIterator<>(resultSetsNeeded, resultSetBatchSize)) {
if (AbstractDao.log.isDebugEnabled())
AbstractDao.log.debug("Starting batch of resultsets: " + StringUtils.abbreviate(StringUtils.join(resultSetIdBatch, ","), 100));
/*
* Get the probes using the CommonQueries gene2cs. Otherwise we (in effect) end up doing this over and over
* again.
*/
Map<Long, Collection<Long>> cs2GeneIdMap = this.getProbesForGenesInResultSetBatch(session, geneIds, resultSetIdsMap, resultSetIdBatch);
queryObject.setParameterList("rs_ids", resultSetIdBatch);
int numGeneBatchesDone = 0;
final int numGeneBatches = (int) Math.ceil(cs2GeneIdMap.size() / geneBatchSize);
StopWatch innerQt = new StopWatch();
// iterate over batches of probes (genes)
for (Collection<Long> probeBatch : new BatchIterator<>(cs2GeneIdMap.keySet(), geneBatchSize)) {
if (AbstractDao.log.isDebugEnabled())
AbstractDao.log.debug("Starting batch of probes: " + StringUtils.abbreviate(StringUtils.join(probeBatch, ","), 100));
// would it help to sort the probeBatch/
List<Long> pbL = new Vector<>(probeBatch);
Collections.sort(pbL);
queryObject.setParameterList("probe_ids", pbL);
innerQt.start();
List<?> queryResult = queryObject.list();
innerQt.stop();
if (innerQt.getTime() > 2000) {
// show the actual query with params.
AbstractDao.log.info("Query time: " + innerQt.getTime() + "ms:\n " + queryObject.getQueryString().replace(":probe_ids", StringUtils.join(probeBatch, ",")).replace(":rs_ids", StringUtils.join(resultSetIdBatch, ",")));
}
innerQt.reset();
/*
* Each query tuple are the probe, result, resultsSet, qvalue, pvalue.
*/
for (Object o : queryResult) {
// Long resultSetId = ( ( BigInteger )((Object[])o)[2] ).longValue();
// if (!resultSetId.equals)
numResults += this.processResultTuple(o, resultsFromDb, cs2GeneIdMap);
}
if (timer.getTime() > 5000 && AbstractDao.log.isInfoEnabled()) {
AbstractDao.log.info("Batch time: " + timer.getTime() + "ms; Fetched DiffEx " + numResults + " results so far. " + numResultSetBatchesDone + "/" + numResultSetBatches + " resultset batches completed. " + numGeneBatchesDone + "/" + numGeneBatches + " gene batches done.");
timer.reset();
timer.start();
}
// Check if task was cancelled.
if (Thread.currentThread().isInterrupted()) {
throw new TaskCancelledException("Search was cancelled");
}
numGeneBatchesDone++;
if (DifferentialExpressionResultDaoImpl.CORRECTED_PVALUE_THRESHOLD_TO_BE_CONSIDERED_DIFF_EX < 1.0) {
timeForFillingNonSig += this.fillNonSignificant(pbL, resultSetIdsMap, resultsFromDb, resultSetIdBatch, cs2GeneIdMap, session);
}
}
// Check if task was cancelled.
if (Thread.currentThread().isInterrupted()) {
throw new TaskCancelledException("Search was cancelled");
}
numResultSetBatchesDone++;
}
if (timer.getTime() > 1000 && AbstractDao.log.isInfoEnabled()) {
AbstractDao.log.info("Fetching DiffEx from DB took total of " + timer.getTime() + " ms : geneIds=" + StringUtils.abbreviate(StringUtils.join(geneIds, ","), 50) + " result set=" + StringUtils.abbreviate(StringUtils.join(resultSetsNeeded, ","), 50));
if (timeForFillingNonSig > 100) {
AbstractDao.log.info("Filling in non-significant values: " + timeForFillingNonSig + "ms in total");
}
}
// Add the DB results to the cached results.
this.addToCache(resultsFromDb, resultSetsNeeded, geneIds);
for (Long resultSetId : resultsFromDb.keySet()) {
Map<Long, DiffExprGeneSearchResult> geneResults = resultsFromDb.get(resultSetId);
if (results.containsKey(resultSetId)) {
results.get(resultSetId).putAll(geneResults);
} else {
results.put(resultSetId, geneResults);
}
}
return results;
}
Aggregations