use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.
the class DifferentialExpressionResultDaoImpl method loadContrastDetailsForResults.
/**
* Key method for getting contrasts associated with results.
*/
@Override
public Map<Long, ContrastsValueObject> loadContrastDetailsForResults(Collection<Long> ids) {
// language=SQL
final String queryString = "SELECT DISTINCT c.ID, c.LOG_FOLD_CHANGE, c.FACTOR_VALUE_FK," + " c.DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT_FK, c.PVALUE FROM CONTRAST_RESULT c" + " WHERE c.DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT_FK IN (:ids) ";
Map<Long, ContrastsValueObject> probeResults = new HashMap<>();
if (ids.isEmpty()) {
return probeResults;
}
SQLQuery query = this.getSessionFactory().getCurrentSession().createSQLQuery(queryString);
// previously: 500, then 1000. New optimized query is plenty fast.
int BATCH_SIZE = 2000;
StopWatch timer = new StopWatch();
for (Collection<Long> batch : new BatchIterator<>(ids, BATCH_SIZE)) {
timer.reset();
timer.start();
query.setParameterList("ids", batch);
List<?> batchR = query.list();
for (Object o : batchR) {
Object[] ol = (Object[]) o;
Long resultId = ((BigInteger) ol[3]).longValue();
if (!probeResults.containsKey(resultId)) {
probeResults.put(resultId, new ContrastsValueObject(resultId));
}
ContrastsValueObject cvo = probeResults.get(resultId);
Long contrastId = ((BigInteger) ol[0]).longValue();
Double logFoldChange = ol[1] == null ? null : (Double) ol[1];
Long factorValueId = ol[2] == null ? null : ((BigInteger) ol[2]).longValue();
Double pvalue = ol[4] == null ? null : (Double) ol[4];
cvo.addContrast(contrastId, factorValueId, logFoldChange, pvalue, null);
}
if (timer.getTime() > 2000) {
AbstractDao.log.info("Fetch " + batch.size() + " results with contrasts: " + timer.getTime() + "ms; query was\n " + queryString.replace(":ids", StringUtils.join(batch, ",")));
}
}
return probeResults;
}
use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.
the class CoexpressionDaoImpl method getCoexpressionFromCacheOrDbViaGenes.
/**
* Fetch coexpression data for one or more genes, without a constraint on data sets, but with other parameters
* possible. It checks the cache, then the database. Results not retrieved from the cache will be immediately cached
* (if appropriate)
*
* @param t taxon
* @param genes IDs, assumed to be all from the same taxon
* @param stringency minimum level of support required
* @param quick whether to fill in the information on which data sets were supporting and how many datasets were
* tested.
* @return map of gene ids to ranked list of coexpression value objects, which will still need to be trimmed.
*/
private Map<Long, List<CoexpressionValueObject>> getCoexpressionFromCacheOrDbViaGenes(Taxon t, Collection<Long> genes, int stringency, boolean quick) {
Map<Long, List<CoexpressionValueObject>> finalResult = new HashMap<>();
/*
* First, check the cache -- if the stringency is > =limit
*/
Collection<Long> genesNeeded = new HashSet<>(genes);
if (stringency >= CoexpressionCache.CACHE_QUERY_STRINGENCY) {
genesNeeded = this.checkCache(genes, finalResult);
if (genesNeeded.isEmpty()) {
return finalResult;
}
}
// we assume the genes are from the same taxon.
assert t != null;
// fetch rest of genes needed from the database.
StopWatch timer = new StopWatch();
timer.start();
// how many genes to get at once.
int CHUNK_SIZE = 64;
int genesQueried = 0;
BatchIterator<Long> geneIdsIt = new BatchIterator<>(genesNeeded, CHUNK_SIZE);
int total = 0;
for (; geneIdsIt.hasNext(); ) {
StopWatch innertimer = new StopWatch();
innertimer.start();
Collection<Long> batch = geneIdsIt.next();
Map<Long, List<CoexpressionValueObject>> rr = this.getCoexpressionFromDbViaGenes2(batch, t, stringency, !quick);
// we should not cache unless everything is populated
if (!rr.isEmpty() && stringency <= CoexpressionCache.CACHE_QUERY_STRINGENCY && !quick) {
gene2GeneCoexpressionCache.cacheCoexpression(rr);
}
for (Long g : rr.keySet()) {
// could replace with a single putAll but want this assertion for now.
assert !finalResult.containsKey(g);
finalResult.put(g, rr.get(g));
total += rr.get(g).size();
}
if (innertimer.getTime() > 1000 && genesQueried > 0) {
CoexpressionDaoImpl.log.debug("Fetched " + total + " coexpression results from db for " + genesQueried + "/" + genesNeeded.size() + " genes needed in " + innertimer.getTime() + "ms");
}
genesQueried += batch.size();
}
if (timer.getTime() > 10000) {
// this raw count is not really relevant - it has to be filtered later.
CoexpressionDaoImpl.log.debug("Fetched " + total + " coexpression results from db or cache for " + genes.size() + " genes in " + timer.getTime() + "ms");
}
return finalResult;
}
use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.
the class CharacteristicDaoImpl method getParents.
@Override
public Map<Characteristic, Object> getParents(Class<?> parentClass, Collection<Characteristic> characteristics) {
Map<Characteristic, Object> charToParent = new HashMap<>();
if (characteristics == null || characteristics.size() == 0) {
return charToParent;
}
if (AbstractDao.log.isDebugEnabled()) {
Collection<String> uris = new HashSet<>();
for (Characteristic c : characteristics) {
if (c instanceof VocabCharacteristic) {
VocabCharacteristic vc = (VocabCharacteristic) c;
if (vc.getValueUri() == null)
continue;
uris.add(vc.getValueUri());
}
}
AbstractDao.log.debug("For class=" + parentClass.getSimpleName() + ": " + characteristics.size() + " Characteristics have URIS:\n" + StringUtils.join(uris, "\n"));
}
StopWatch timer = new StopWatch();
timer.start();
for (Collection<Characteristic> batch : new BatchIterator<>(characteristics, CharacteristicDaoImpl.BATCH_SIZE)) {
this.batchGetParents(parentClass, batch, charToParent);
}
if (timer.getTime() > 1000) {
AbstractDao.log.info("Fetch parents of characteristics: " + timer.getTime() + "ms for " + characteristics.size() + " elements for class=" + parentClass.getSimpleName());
}
return charToParent;
}
Aggregations