use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class BioSequenceDaoImpl method find.
@SuppressWarnings("unchecked")
@Override
public BioSequence find(BioSequence bioSequence) {
BusinessKey.checkValidKey(bioSequence);
Criteria queryObject = BusinessKey.createQueryObject(this.getSessionFactory().getCurrentSession(), bioSequence);
queryObject.setReadOnly(true);
queryObject.setFlushMode(FlushMode.MANUAL);
/*
* this initially matches on name and taxon only.
*/
java.util.List<?> results = queryObject.list();
Object result = null;
if (results != null) {
if (results.size() > 1) {
this.debug(bioSequence, results);
// Try to find the best match. See BusinessKey for more
// explanation of why this is needed.
BioSequence match = null;
for (BioSequence res : (Collection<BioSequence>) results) {
if (res.equals(bioSequence)) {
if (match != null) {
AbstractDao.log.warn("More than one sequence in the database matches " + bioSequence + ", returning arbitrary match: " + match);
break;
}
match = res;
}
}
return match;
} else if (results.size() == 1) {
result = results.iterator().next();
}
}
return (BioSequence) result;
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class BioSequenceDaoImpl method debug.
private void debug(BioSequence query, List<?> results) {
StringBuilder sb = new StringBuilder();
sb.append("\nMultiple BioSequences found matching query:\n");
if (query != null) {
sb.append("\tQuery: ID=").append(query.getId()).append(" Name=").append(query.getName());
if (StringUtils.isNotBlank(query.getSequence()))
sb.append(" Sequence=").append(StringUtils.abbreviate(query.getSequence(), 10));
if (query.getSequenceDatabaseEntry() != null)
sb.append(" acc=").append(query.getSequenceDatabaseEntry().getAccession());
sb.append("\n");
}
for (Object object : results) {
BioSequence entity = (BioSequence) object;
sb.append("\tMatch: ID=").append(entity.getId()).append(" Name=").append(entity.getName());
if (StringUtils.isNotBlank(entity.getSequence()))
sb.append(" Sequence=").append(StringUtils.abbreviate(entity.getSequence(), 10));
if (entity.getSequenceDatabaseEntry() != null)
sb.append(" acc=").append(entity.getSequenceDatabaseEntry().getAccession());
sb.append("\n");
}
if (AbstractDao.log.isDebugEnabled())
AbstractDao.log.debug(sb.toString());
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class DesignElementDataVectorDaoImpl method thawRawAndProcessed.
@Override
public void thawRawAndProcessed(Collection<DesignElementDataVector> designElementDataVectors) {
if (designElementDataVectors == null)
return;
Session session = this.getSessionFactory().getCurrentSession();
Hibernate.initialize(designElementDataVectors);
StopWatch timer = new StopWatch();
timer.start();
Collection<ExpressionExperiment> ees = new HashSet<>();
Map<BioAssayDimension, Collection<DesignElementDataVector>> dims = new HashMap<>();
Collection<CompositeSequence> cs = new HashSet<>();
for (DesignElementDataVector vector : designElementDataVectors) {
session.buildLockRequest(LockOptions.NONE).lock(vector);
Hibernate.initialize(vector);
Hibernate.initialize(vector.getQuantitationType());
BioAssayDimension bad = vector.getBioAssayDimension();
if (!dims.containsKey(bad)) {
dims.put(bad, new HashSet<DesignElementDataVector>());
}
dims.get(bad).add(vector);
cs.add(vector.getDesignElement());
ees.add(vector.getExpressionExperiment());
session.evict(vector.getQuantitationType());
session.evict(vector);
}
if (timer.getTime() > designElementDataVectors.size()) {
AbstractDao.log.info("Thaw phase 1, " + designElementDataVectors.size() + " vectors initialized in " + timer.getTime() + "ms ");
}
timer.reset();
timer.start();
// lightly thawRawAndProcessed the EEs we saw
for (ExpressionExperiment ee : ees) {
Hibernate.initialize(ee);
session.evict(ee);
}
if (timer.getTime() > 200) {
AbstractDao.log.info("Thaw phase 2, " + ees.size() + " vector-associated expression experiments in " + timer.getTime() + "ms ");
}
timer.reset();
timer.start();
// thawRawAndProcessed the bioassayDimensions we saw -- usually one, more rarely two.
for (BioAssayDimension bad : dims.keySet()) {
BioAssayDimension tbad = (BioAssayDimension) this.getSessionFactory().getCurrentSession().createQuery("select distinct bad from BioAssayDimension bad join fetch bad.bioAssays ba join fetch ba.sampleUsed " + "bm join fetch ba.arrayDesignUsed left join fetch bm.factorValues fetch all properties where bad.id= :bad ").setParameter("bad", bad.getId()).uniqueResult();
assert tbad != null;
assert !dims.get(tbad).isEmpty();
for (DesignElementDataVector v : designElementDataVectors) {
if (v.getBioAssayDimension().getId().equals(tbad.getId())) {
v.setBioAssayDimension(tbad);
}
}
}
if (timer.getTime() > 1000) {
AbstractDao.log.info("Thaw phase 3, " + dims.size() + " vector-associated bioassaydimensions in " + timer.getTime() + "ms ");
}
timer.reset();
timer.start();
// thawRawAndProcessed the designelements we saw. SLOW
long lastTime = 0;
int count = 0;
for (CompositeSequence de : cs) {
BioSequence seq = de.getBiologicalCharacteristic();
if (seq == null)
continue;
session.buildLockRequest(LockOptions.NONE).lock(seq);
Hibernate.initialize(seq);
// is this really necessary?
ArrayDesign arrayDesign = de.getArrayDesign();
Hibernate.initialize(arrayDesign);
if (++count % 10000 == 0) {
if (timer.getTime() - lastTime > 1000) {
AbstractDao.log.info("Thawed " + count + " vector-associated probes " + timer.getTime() + " ms");
}
lastTime = timer.getTime();
}
}
timer.stop();
if (designElementDataVectors.size() >= 2000 || timer.getTime() > 200) {
AbstractDao.log.info("Thaw phase 4 " + cs.size() + " vector-associated probes thawed in " + timer.getTime() + "ms");
}
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class ArrayDesignRepeatScanCli method processArrayDesign.
private void processArrayDesign(ArrayDesign design) {
ArrayDesign thawed = this.thaw(design);
// no taxon is passed to this method so all sequences will be retrieved even for multi taxon arrays
Collection<BioSequence> sequences = ArrayDesignSequenceAlignmentServiceImpl.getSequences(thawed);
RepeatScan scanner = new RepeatScan();
Collection<BioSequence> altered;
if (this.inputFileName != null) {
altered = scanner.processRepeatMaskerOutput(sequences, inputFileName);
} else {
altered = scanner.repeatScan(sequences);
}
AbstractCLI.log.info("Saving...");
bsService.update(altered);
if (this.inputFileName != null) {
this.audit(thawed, "Repeat scan data from file: " + inputFileName + ", updated " + altered.size() + " sequences.");
} else {
this.audit(thawed, "Repeat scan done, updated " + altered.size() + " sequences.");
}
AbstractCLI.log.info("Done with " + thawed);
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class ArrayDesignProbeMapperServiceImpl method processCompositeSequence.
@Override
@Transactional
public Map<String, Collection<BlatAssociation>> processCompositeSequence(ProbeMapperConfig config, Taxon taxon, GoldenPathSequenceAnalysis goldenPathDb, CompositeSequence compositeSequence) {
BioSequence bs = compositeSequence.getBiologicalCharacteristic();
if (bs == null)
return null;
/*
* It isn't 100% clear what the right thing to do is. But this seems at least _reasonable_ when there is a
* mismatch
*/
if (taxon != null && !bs.getTaxon().equals(taxon)) {
return null;
}
GoldenPathSequenceAnalysis db;
if (goldenPathDb == null) {
db = new GoldenPathSequenceAnalysis(bs.getTaxon());
} else {
db = goldenPathDb;
}
final Collection<BlatResult> blatResults = blatResultService.findByBioSequence(bs);
ProbeMapUtils.removeDuplicates(blatResults);
if (blatResults.isEmpty())
return null;
return probeMapper.processBlatResults(db, blatResults, config);
}
Aggregations