use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.
the class GeneDaoImpl method findByOfficialSymbols.
@Override
public Map<String, Gene> findByOfficialSymbols(Collection<String> query, Long taxonId) {
Map<String, Gene> result = new HashMap<>();
// language=HQL
final String queryString = "select g from Gene as g join fetch g.taxon t where g.officialSymbol in (:symbols) and t.id = :taxonId";
for (Collection<String> batch : new BatchIterator<>(query, GeneDaoImpl.BATCH_SIZE)) {
// noinspection unchecked
List<Gene> results = this.getHibernateTemplate().findByNamedParam(queryString, new String[] { "symbols", "taxonId" }, new Object[] { batch, taxonId });
for (Gene g : results) {
result.put(g.getOfficialSymbol().toLowerCase(), g);
}
}
return result;
}
use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.
the class GeneDaoImpl method findByNcbiIds.
@Override
public Map<Integer, Gene> findByNcbiIds(Collection<Integer> ncbiIds) {
Map<Integer, Gene> result = new HashMap<>();
// language=HQL
final String queryString = "from Gene g where g.ncbiGeneId in (:ncbi)";
for (Collection<Integer> batch : new BatchIterator<>(ncbiIds, GeneDaoImpl.BATCH_SIZE)) {
// noinspection unchecked
List<Gene> results = this.getHibernateTemplate().findByNamedParam(queryString, "ncbi", batch);
for (Gene g : results) {
result.put(g.getNcbiGeneId(), g);
}
}
return result;
}
use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorDaoImpl method getRanks.
@Override
public Map<ExpressionExperiment, Map<Gene, Collection<Double>>> getRanks(Collection<ExpressionExperiment> expressionExperiments, Collection<Gene> genes, RankMethod method) {
Collection<ArrayDesign> arrayDesigns = CommonQueries.getArrayDesignsUsed(EntityUtils.getIds(expressionExperiments), this.getSessionFactory().getCurrentSession()).keySet();
// this could be further improved by getting probes specific to experiments in batches.
Map<CompositeSequence, Collection<Gene>> cs2gene = CommonQueries.getCs2GeneMap(genes, arrayDesigns, this.getSessionFactory().getCurrentSession());
if (cs2gene.isEmpty()) {
AbstractDao.log.warn("No composite sequences found for genes");
return new HashMap<>();
}
Map<ExpressionExperiment, Map<Gene, Collection<Double>>> result = new HashMap<>();
BatchIterator<CompositeSequence> batchIterator = new BatchIterator<>(cs2gene.keySet(), 500);
for (Collection<CompositeSequence> batch : batchIterator) {
// language=HQL
final String queryString = "select distinct dedv.expressionExperiment, dedv.designElement, dedv.rankByMean, " + "dedv.rankByMax from ProcessedExpressionDataVector dedv " + " where dedv.designElement in ( :cs ) and dedv.expressionExperiment in (:ees) ";
List qr = this.getSessionFactory().getCurrentSession().createQuery(queryString).setParameter("cs", batch).setParameterList("ees", expressionExperiments).list();
for (Object o : qr) {
Object[] oa = (Object[]) o;
ExpressionExperiment e = (ExpressionExperiment) oa[0];
CompositeSequence d = (CompositeSequence) oa[1];
Double rMean = oa[2] == null ? Double.NaN : (Double) oa[2];
Double rMax = oa[3] == null ? Double.NaN : (Double) oa[3];
if (!result.containsKey(e)) {
result.put(e, new HashMap<Gene, Collection<Double>>());
}
Map<Gene, Collection<Double>> rMap = result.get(e);
Collection<Gene> genes4probe = cs2gene.get(d);
this.addToGene(method, rMap, rMean, rMax, genes4probe);
}
}
return result;
}
use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.
the class DifferentialExpressionResultDaoImpl method load.
@Override
public Collection<DifferentialExpressionAnalysisResult> load(Collection<Long> ids) {
// language=HQL
final String queryString = "from DifferentialExpressionAnalysisResultImpl dea where dea.id in (:ids)";
Collection<DifferentialExpressionAnalysisResult> probeResults = new HashSet<>();
if (ids.isEmpty()) {
return probeResults;
}
// previously: 500.
int BATCH_SIZE = 1000;
for (Collection<Long> batch : new BatchIterator<>(ids, BATCH_SIZE)) {
StopWatch timer = new StopWatch();
timer.start();
// noinspection unchecked
probeResults.addAll(this.getSessionFactory().getCurrentSession().createQuery(queryString).setParameterList("ids", batch).list());
if (timer.getTime() > 1000) {
AbstractDao.log.info("Fetch " + batch.size() + "/" + ids.size() + " results with contrasts: " + timer.getTime() + "ms; query was\n " + queryString);
}
}
return probeResults;
}
use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.
the class DifferentialExpressionResultDaoImpl method findDiffExAnalysisResultIdsInResultSets.
@Override
public Map<Long, Map<Long, DiffExprGeneSearchResult>> findDiffExAnalysisResultIdsInResultSets(Collection<DiffExResultSetSummaryValueObject> resultSets, Collection<Long> geneIds) {
Map<Long, Map<Long, DiffExprGeneSearchResult>> results = new HashMap<>();
Session session = this.getSessionFactory().getCurrentSession();
Map<Long, DiffExResultSetSummaryValueObject> resultSetIdsMap = EntityUtils.getIdMap(resultSets, "getResultSetId");
Map<Long, Collection<Long>> foundInCache = this.fillFromCache(results, resultSetIdsMap.keySet(), geneIds);
if (!foundInCache.isEmpty()) {
AbstractDao.log.info("Results for " + foundInCache.size() + " resultsets found in cache");
} else {
AbstractDao.log.info("No results were in the cache");
}
Collection<Long> resultSetsNeeded = this.stripUnneededResultSets(foundInCache, resultSetIdsMap.keySet(), geneIds);
// Are we finished?
if (resultSetsNeeded.isEmpty()) {
AbstractDao.log.info("All results were in the cache.");
return results;
}
AbstractDao.log.info(foundInCache.size() + "/" + resultSetIdsMap.size() + " resultsSets had at least some cached results; still need to query " + resultSetsNeeded.size());
assert !resultSetsNeeded.isEmpty();
org.hibernate.SQLQuery queryObject = session.createSQLQuery(DifferentialExpressionResultDaoImpl.fetchBatchDifferentialExpressionAnalysisResultsByResultSetsAndGeneQuery);
/*
* These values have been tweaked to probe for performance issues.
*/
int resultSetBatchSize = 50;
int geneBatchSize = 100;
if (resultSetsNeeded.size() > geneIds.size()) {
resultSetBatchSize = Math.min(500, resultSetsNeeded.size());
AbstractDao.log.info("Batching by result sets (" + resultSetsNeeded.size() + " resultSets); " + geneIds.size() + " genes; batch size=" + resultSetBatchSize);
} else {
geneBatchSize = Math.min(200, geneIds.size());
AbstractDao.log.info("Batching by genes (" + geneIds.size() + " genes); " + resultSetsNeeded.size() + " resultSets; batch size=" + geneBatchSize);
}
final int numResultSetBatches = (int) Math.ceil(resultSetsNeeded.size() / resultSetBatchSize);
queryObject.setFlushMode(FlushMode.MANUAL);
StopWatch timer = new StopWatch();
timer.start();
int numResults = 0;
long timeForFillingNonSig = 0;
Map<Long, Map<Long, DiffExprGeneSearchResult>> resultsFromDb = new HashMap<>();
int numResultSetBatchesDone = 0;
// Iterate over batches of resultSets
for (Collection<Long> resultSetIdBatch : new BatchIterator<>(resultSetsNeeded, resultSetBatchSize)) {
if (AbstractDao.log.isDebugEnabled())
AbstractDao.log.debug("Starting batch of resultsets: " + StringUtils.abbreviate(StringUtils.join(resultSetIdBatch, ","), 100));
/*
* Get the probes using the CommonQueries gene2cs. Otherwise we (in effect) end up doing this over and over
* again.
*/
Map<Long, Collection<Long>> cs2GeneIdMap = this.getProbesForGenesInResultSetBatch(session, geneIds, resultSetIdsMap, resultSetIdBatch);
queryObject.setParameterList("rs_ids", resultSetIdBatch);
int numGeneBatchesDone = 0;
final int numGeneBatches = (int) Math.ceil(cs2GeneIdMap.size() / geneBatchSize);
StopWatch innerQt = new StopWatch();
// iterate over batches of probes (genes)
for (Collection<Long> probeBatch : new BatchIterator<>(cs2GeneIdMap.keySet(), geneBatchSize)) {
if (AbstractDao.log.isDebugEnabled())
AbstractDao.log.debug("Starting batch of probes: " + StringUtils.abbreviate(StringUtils.join(probeBatch, ","), 100));
// would it help to sort the probeBatch/
List<Long> pbL = new Vector<>(probeBatch);
Collections.sort(pbL);
queryObject.setParameterList("probe_ids", pbL);
innerQt.start();
List<?> queryResult = queryObject.list();
innerQt.stop();
if (innerQt.getTime() > 2000) {
// show the actual query with params.
AbstractDao.log.info("Query time: " + innerQt.getTime() + "ms:\n " + queryObject.getQueryString().replace(":probe_ids", StringUtils.join(probeBatch, ",")).replace(":rs_ids", StringUtils.join(resultSetIdBatch, ",")));
}
innerQt.reset();
/*
* Each query tuple are the probe, result, resultsSet, qvalue, pvalue.
*/
for (Object o : queryResult) {
// Long resultSetId = ( ( BigInteger )((Object[])o)[2] ).longValue();
// if (!resultSetId.equals)
numResults += this.processResultTuple(o, resultsFromDb, cs2GeneIdMap);
}
if (timer.getTime() > 5000 && AbstractDao.log.isInfoEnabled()) {
AbstractDao.log.info("Batch time: " + timer.getTime() + "ms; Fetched DiffEx " + numResults + " results so far. " + numResultSetBatchesDone + "/" + numResultSetBatches + " resultset batches completed. " + numGeneBatchesDone + "/" + numGeneBatches + " gene batches done.");
timer.reset();
timer.start();
}
// Check if task was cancelled.
if (Thread.currentThread().isInterrupted()) {
throw new TaskCancelledException("Search was cancelled");
}
numGeneBatchesDone++;
if (DifferentialExpressionResultDaoImpl.CORRECTED_PVALUE_THRESHOLD_TO_BE_CONSIDERED_DIFF_EX < 1.0) {
timeForFillingNonSig += this.fillNonSignificant(pbL, resultSetIdsMap, resultsFromDb, resultSetIdBatch, cs2GeneIdMap, session);
}
}
// Check if task was cancelled.
if (Thread.currentThread().isInterrupted()) {
throw new TaskCancelledException("Search was cancelled");
}
numResultSetBatchesDone++;
}
if (timer.getTime() > 1000 && AbstractDao.log.isInfoEnabled()) {
AbstractDao.log.info("Fetching DiffEx from DB took total of " + timer.getTime() + " ms : geneIds=" + StringUtils.abbreviate(StringUtils.join(geneIds, ","), 50) + " result set=" + StringUtils.abbreviate(StringUtils.join(resultSetsNeeded, ","), 50));
if (timeForFillingNonSig > 100) {
AbstractDao.log.info("Filling in non-significant values: " + timeForFillingNonSig + "ms in total");
}
}
// Add the DB results to the cached results.
this.addToCache(resultsFromDb, resultSetsNeeded, geneIds);
for (Long resultSetId : resultsFromDb.keySet()) {
Map<Long, DiffExprGeneSearchResult> geneResults = resultsFromDb.get(resultSetId);
if (results.containsKey(resultSetId)) {
results.get(resultSetId).putAll(geneResults);
} else {
results.put(resultSetId, geneResults);
}
}
return results;
}
Aggregations