use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.
the class DesignElementDataVectorDaoImpl method getVectorsForProbesInExperiments.
/**
* @param ee ee
* @param cs2gene Map of probes to genes.
* @return map of vectors to gene ids.
*/
Map<T, Collection<Long>> getVectorsForProbesInExperiments(Long ee, Map<Long, Collection<Long>> cs2gene) {
// Do not do in clause for experiments, as it can't use the indices
// language=HQL
String queryString = "select dedv, dedv.designElement.id from ProcessedExpressionDataVector dedv fetch all properties" + " where dedv.designElement.id in ( :cs ) and dedv.expressionExperiment.id = :eeId ";
Session session = this.getSessionFactory().getCurrentSession();
org.hibernate.Query queryObject = session.createQuery(queryString);
queryObject.setReadOnly(true);
queryObject.setFlushMode(FlushMode.MANUAL);
Map<T, Collection<Long>> dedv2genes = new HashMap<>();
StopWatch timer = new StopWatch();
timer.start();
queryObject.setLong("eeId", ee);
int batchSize = 100;
for (Collection<Long> batch : new BatchIterator<>(cs2gene.keySet(), batchSize)) {
this.getVectorsBatch(cs2gene, queryObject, dedv2genes, batch);
}
if (timer.getTime() > Math.max(200, 20 * dedv2genes.size())) {
AbstractDao.log.info("Fetched " + dedv2genes.size() + " vectors for " + cs2gene.size() + " probes in " + timer.getTime() + "ms\n" + "Vector query was: " + queryString);
}
return dedv2genes;
}
use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.
the class DesignElementDataVectorDaoImpl method getVectorsForProbesInExperiments.
Map<T, Collection<Long>> getVectorsForProbesInExperiments(Map<Long, Collection<Long>> cs2gene) {
// language=HQL
String queryString = "select dedv, dedv.designElement.id from ProcessedExpressionDataVector dedv fetch all properties" + " where dedv.designElement.id in ( :cs ) ";
Session session = this.getSessionFactory().getCurrentSession();
org.hibernate.Query queryObject = session.createQuery(queryString);
queryObject.setReadOnly(true);
queryObject.setFlushMode(FlushMode.MANUAL);
Map<T, Collection<Long>> dedv2genes = new HashMap<>();
StopWatch timer = new StopWatch();
timer.start();
int batchSize = 100;
for (Collection<Long> batch : new BatchIterator<>(cs2gene.keySet(), batchSize)) {
this.getVectorsBatch(cs2gene, queryObject, dedv2genes, batch);
}
if (timer.getTime() > Math.max(200, 20 * dedv2genes.size())) {
AbstractDao.log.info("Fetched " + dedv2genes.size() + " vectors for " + cs2gene.size() + " probes in " + timer.getTime() + "ms\n" + "Vector query was: " + queryString);
}
return dedv2genes;
}
use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.
the class GeneDaoImpl method loadThawed.
@Override
public Collection<Gene> loadThawed(Collection<Long> ids) {
Collection<Gene> result = new HashSet<>();
if (ids.isEmpty())
return result;
StopWatch timer = new StopWatch();
timer.start();
for (Collection<Long> batch : new BatchIterator<>(ids, GeneDaoImpl.BATCH_SIZE)) {
result.addAll(this.doLoadThawedLite(batch));
}
if (timer.getTime() > 1000) {
AbstractDao.log.debug("Load+thawRawAndProcessed " + result.size() + " genes: " + timer.getTime() + "ms");
}
return result;
}
use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.
the class GeneDaoImpl method loadThawedLiter.
@Override
public Collection<Gene> loadThawedLiter(Collection<Long> ids) {
Collection<Gene> result = new HashSet<>();
if (ids.isEmpty())
return result;
StopWatch timer = new StopWatch();
timer.start();
for (Collection<Long> batch : new BatchIterator<>(ids, GeneDaoImpl.BATCH_SIZE)) {
result.addAll(this.doLoadThawedLiter(batch));
}
if (timer.getTime() > 1000) {
AbstractDao.log.debug("Load+thawRawAndProcessed " + result.size() + " genes: " + timer.getTime() + "ms");
}
return result;
}
use of ubic.basecode.util.BatchIterator in project Gemma by PavlidisLab.
the class CoexpressionDaoImpl method updatedTestedIn.
/**
* Mark the genes as being tested for coexpression in the data set and persist the information in the database. This
* is run at the tail end of coexpression analysis for the data set.
*
* @param ee the data set
* @param genesTested the genes
*/
private void updatedTestedIn(BioAssaySet ee, Collection<Gene> genesTested) {
Session sess = this.getSessionFactory().getCurrentSession();
Query q = sess.createQuery("from GeneCoexpressionTestedIn where geneId in (:ids)");
Set<Long> seenGenes = new HashSet<>();
Collection<Long> geneids = EntityUtils.getIds(genesTested);
BatchIterator<Long> bi = new BatchIterator<>(geneids, 512);
for (; bi.hasNext(); ) {
q.setParameterList("ids", bi.next());
List<GeneCoexpressionTestedIn> list = q.list();
int count = 0;
for (GeneCoexpressionTestedIn gcti : list) {
// int old = gcti.getNumIds(); // debug code
gcti.addEntity(ee.getId());
sess.update(gcti);
assert gcti.isIncluded(ee.getId());
seenGenes.add(gcti.getGeneId());
if (++count % 256 == 0) {
sess.flush();
sess.clear();
}
}
}
if (!seenGenes.isEmpty()) {
CoexpressionDaoImpl.log.info("Updated tested-in information for " + seenGenes.size() + " genes");
// TODO do it just for the genes changed.
this.geneTestedInCache.clearCache();
}
sess.flush();
sess.clear();
// discover genes which don't have an entry at all.
geneids.removeAll(seenGenes);
if (geneids.isEmpty()) {
return;
}
CoexpressionDaoImpl.log.info("Adding tested-in information for " + geneids.size() + " genes");
int count = 0;
for (Long id : geneids) {
GeneCoexpressionTestedIn gcti = new GeneCoexpressionTestedIn(id);
gcti.addEntity(ee.getId());
assert gcti.isIncluded(ee.getId());
assert gcti.getNumIds() == 1;
sess.save(gcti);
if (++count % 256 == 0) {
sess.flush();
sess.clear();
}
}
}
Aggregations