use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.
the class BlatResultDaoImpl method findByBioSequence.
@SuppressWarnings("unchecked")
@Override
public Collection<BlatResult> findByBioSequence(BioSequence bioSequence) {
BusinessKey.checkValidKey(bioSequence);
Criteria queryObject = this.getSessionFactory().getCurrentSession().createCriteria(BlatResult.class);
BusinessKey.attachCriteria(queryObject, bioSequence, "querySequence");
List<?> results = queryObject.list();
if (results != null) {
for (Object object : results) {
BlatResult br = (BlatResult) object;
if (br.getTargetChromosome() != null) {
Hibernate.initialize(br.getTargetChromosome());
}
Hibernate.initialize(br.getQuerySequence());
}
}
return (Collection<BlatResult>) results;
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.
the class ArrayDesignProbeMapperServiceImpl method processCompositeSequence.
@Override
@Transactional
public Map<String, Collection<BlatAssociation>> processCompositeSequence(ProbeMapperConfig config, Taxon taxon, GoldenPathSequenceAnalysis goldenPathDb, CompositeSequence compositeSequence) {
BioSequence bs = compositeSequence.getBiologicalCharacteristic();
if (bs == null)
return null;
/*
* It isn't 100% clear what the right thing to do is. But this seems at least _reasonable_ when there is a
* mismatch
*/
if (taxon != null && !bs.getTaxon().equals(taxon)) {
return null;
}
GoldenPathSequenceAnalysis db;
if (goldenPathDb == null) {
db = new GoldenPathSequenceAnalysis(bs.getTaxon());
} else {
db = goldenPathDb;
}
final Collection<BlatResult> blatResults = blatResultService.findByBioSequence(bs);
ProbeMapUtils.removeDuplicates(blatResults);
if (blatResults.isEmpty())
return null;
return probeMapper.processBlatResults(db, blatResults, config);
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.
the class ArrayDesignSequenceAlignmentServiceImpl method persistBlatResults.
/**
* @param brs, assumed to be from alignments to the genome for the array design (that is, we don't consider aligning
* mouse to human)
*/
@SuppressWarnings("unchecked")
private Collection<BlatResult> persistBlatResults(Collection<BlatResult> brs) {
Collection<Integer> seen = new HashSet<>();
int duplicates = 0;
for (BlatResult br : brs) {
Integer hash = ProbeMapUtils.hashBlatResult(br);
if (seen.contains(hash)) {
duplicates++;
continue;
}
seen.add(hash);
assert br.getQuerySequence() != null;
assert br.getQuerySequence().getName() != null;
Taxon taxon = br.getQuerySequence().getTaxon();
assert taxon != null;
try {
FieldUtils.writeField(br.getTargetChromosome(), "taxon", taxon, true);
} catch (IllegalAccessException e) {
e.printStackTrace();
}
br.getTargetChromosome().getSequence().setTaxon(taxon);
PhysicalLocation pl = br.getTargetAlignedRegion();
if (pl == null) {
pl = PhysicalLocation.Factory.newInstance();
pl.setChromosome(br.getTargetChromosome());
pl.setNucleotide(br.getTargetStart());
assert br.getTargetEnd() != null && br.getTargetStart() != null;
pl.setNucleotideLength(br.getTargetEnd().intValue() - br.getTargetStart().intValue());
pl.setStrand(br.getStrand());
br.setTargetAlignedRegion(pl);
pl.setBin(SequenceBinUtils.binFromRange(br.getTargetStart().intValue(), br.getTargetEnd().intValue()));
}
}
if (duplicates > 0) {
ArrayDesignSequenceAlignmentServiceImpl.log.info(duplicates + " duplicate BLAT hits skipped");
}
return (Collection<BlatResult>) persisterHelper.persist(brs);
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.
the class ArrayDesignSequenceAlignmentServiceImpl method processArrayDesign.
private Collection<BlatResult> processArrayDesign(ArrayDesign ad, boolean sensitive, Blat blat) {
if (blat == null)
blat = new ShellDelegatingBlat();
Collection<BlatResult> allResults = new HashSet<>();
if (sensitive)
ArrayDesignSequenceAlignmentServiceImpl.log.info("Running in 'sensitive' mode if possible");
Collection<Taxon> taxa = arrayDesignService.getTaxa(ad.getId());
boolean first = true;
for (Taxon taxon : taxa) {
Collection<BioSequence> sequencesToBlat = ArrayDesignSequenceAlignmentServiceImpl.getSequences(ad, taxon);
Map<BioSequence, Collection<BlatResult>> results = this.getAlignments(sequencesToBlat, sensitive, taxon, blat);
ArrayDesignSequenceAlignmentServiceImpl.log.info("Got BLAT results for " + results.keySet().size() + " query sequences");
Map<String, BioSequence> nameMap = new HashMap<>();
for (BioSequence bs : results.keySet()) {
if (nameMap.containsKey(bs.getName())) {
throw new IllegalStateException("All distinct sequences on the array must have unique names; found " + bs.getName() + " more than once.");
}
nameMap.put(bs.getName(), bs);
}
int noResults = 0;
int count = 0;
// We only remove the results here, after we have at least one set of blat results.
if (first) {
ArrayDesignSequenceAlignmentServiceImpl.log.info("Looking for old results to remove...");
arrayDesignService.deleteAlignmentData(ad);
}
for (BioSequence sequence : sequencesToBlat) {
if (sequence == null) {
ArrayDesignSequenceAlignmentServiceImpl.log.warn("Null sequence!");
continue;
}
Collection<BlatResult> brs = results.get(nameMap.get(sequence.getName()));
if (brs == null) {
++noResults;
continue;
}
for (BlatResult result : brs) {
// must do this to replace
result.setQuerySequence(sequence);
// placeholder instance.
}
allResults.addAll(this.persistBlatResults(brs));
if (++count % 2000 == 0) {
ArrayDesignSequenceAlignmentServiceImpl.log.info("Checked results for " + count + " queries, " + allResults.size() + " blat results so far.");
}
}
ArrayDesignSequenceAlignmentServiceImpl.log.info(noResults + "/" + sequencesToBlat.size() + " sequences had no blat results");
first = false;
}
arrayDesignReportService.generateArrayDesignReport(ad.getId());
return allResults;
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.
the class MockBlat method blatQuery.
@Override
public Collection<BlatResult> blatQuery(BioSequence b) {
Collection<BlatResult> result = new HashSet<>();
BioSequence chromseq = PersistentDummyObjectHelper.getTestNonPersistentBioSequence(taxon);
chromseq.setLength((long) 1e7);
BlatResult br = BlatResult.Factory.newInstance();
Chromosome chromosome = new Chromosome("XXX", null, chromseq, taxon);
br.setTargetChromosome(chromosome);
assert br.getTargetChromosome().getSequence() != null;
long targetStart = MockBlat.RANDOM.nextInt(chromseq.getLength().intValue());
br.setQuerySequence(b);
br.setTargetStart(targetStart);
br.setTargetEnd(targetStart + b.getLength());
br.setMatches((int) (b.getLength() - 1));
br.setMismatches(1);
br.setRepMatches(0);
br.setQueryGapCount(0);
br.setQueryGapBases(0);
br.setQueryStart(0);
br.setQueryEnd(b.getLength().intValue());
br.setTargetGapBases(0);
br.setTargetGapCount(0);
PhysicalLocation targetAlignedRegion = PhysicalLocation.Factory.newInstance();
targetAlignedRegion.setChromosome(br.getTargetChromosome());
targetAlignedRegion.setNucleotide(targetStart);
targetAlignedRegion.setNucleotideLength(b.getLength().intValue());
targetAlignedRegion.setStrand("+");
result.add(br);
return result;
}
Aggregations