use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.
the class ProbeMapperImpl method processSequences.
@Override
public Map<String, Collection<BlatAssociation>> processSequences(GoldenPathSequenceAnalysis goldenpath, Collection<BioSequence> sequences, ProbeMapperConfig config) {
Blat b = new ShellDelegatingBlat();
b.setBlatScoreThreshold(config.getBlatScoreThreshold());
try {
Map<BioSequence, Collection<BlatResult>> results = b.blatQuery(sequences, goldenpath.getTaxon());
Collection<BlatResult> blatRes = new HashSet<>();
for (Collection<BlatResult> coll : results.values()) {
blatRes.addAll(coll);
}
return this.processBlatResults(goldenpath, blatRes);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.
the class ArrayDesignSequenceAlignmentServiceImpl method processArrayDesign.
@Override
public Collection<BlatResult> processArrayDesign(ArrayDesign ad, Taxon taxon, Collection<BlatResult> rawBlatResults) {
ArrayDesignSequenceAlignmentServiceImpl.log.info("Looking for old results to remove...");
ad = arrayDesignService.thaw(ad);
arrayDesignService.deleteAlignmentData(ad);
// Blat file processing can only be run on one taxon at a time
taxon = this.validateTaxaForBlatFile(ad, taxon);
Collection<BioSequence> sequencesToBlat = ArrayDesignSequenceAlignmentServiceImpl.getSequences(ad);
sequencesToBlat = bioSequenceService.thaw(sequencesToBlat);
// if the blat results were loaded from a file, we have to replace the
// query sequences with the actual ones
// attached to the array design. We have to do this by name because the
// sequence name is what the files contain.
// Note that if there is ambiguity there will be problems!
Map<String, BioSequence> seqMap = new HashMap<>();
for (BioSequence bioSequence : sequencesToBlat) {
seqMap.put(bioSequence.getName(), bioSequence);
}
ExternalDatabase searchedDatabase = ShellDelegatingBlat.getSearchedGenome(taxon);
Collection<BlatResult> toSkip = new HashSet<>();
for (BlatResult result : rawBlatResults) {
/*
* If the sequences don't have ids, replace them with the actual sequences associated with the array design.
*/
if (result.getQuerySequence().getId() == null) {
String querySeqName = result.getQuerySequence().getName();
BioSequence actualSequence = seqMap.get(querySeqName);
if (actualSequence == null) {
ArrayDesignSequenceAlignmentServiceImpl.log.debug("Array design does not contain a sequence with name " + querySeqName);
toSkip.add(result);
continue;
}
result.setQuerySequence(actualSequence);
} else {
result.getQuerySequence().setTaxon(taxon);
}
result.setSearchedDatabase(searchedDatabase);
try {
FieldUtils.writeField(result.getTargetChromosome(), "taxon", taxon, true);
} catch (IllegalAccessException e) {
e.printStackTrace();
}
result.getTargetChromosome().getSequence().setTaxon(taxon);
}
if (toSkip.size() > 0) {
ArrayDesignSequenceAlignmentServiceImpl.log.warn(toSkip.size() + " blat results were for sequences not on " + ad + "; they will be ignored.");
rawBlatResults.removeAll(toSkip);
}
Map<BioSequence, Collection<BlatResult>> goldenPathAlignments = new HashMap<>();
this.getGoldenPathAlignments(sequencesToBlat, taxon, goldenPathAlignments);
for (BioSequence sequence : goldenPathAlignments.keySet()) {
rawBlatResults.addAll(goldenPathAlignments.get(sequence));
}
Collection<BlatResult> results = this.persistBlatResults(rawBlatResults);
arrayDesignReportService.generateArrayDesignReport(ad.getId());
return results;
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.
the class ArrayDesignSequenceAlignmentServiceImpl method getGoldenPathAlignments.
/**
* Check if there are alignment results in the goldenpath database, in which case we do not reanalyze the sequences.
*
* @param sequencesToBlat The full set of sequences that need analysis.
* @param results Will be stored here.
* @return the sequences which ARE NOT found in goldenpath and which therefore DO need blat.
*/
private Collection<BioSequence> getGoldenPathAlignments(Collection<BioSequence> sequencesToBlat, Taxon taxon, Map<BioSequence, Collection<BlatResult>> results) {
GoldenPathQuery gpq = new GoldenPathQuery(taxon);
Collection<BioSequence> needBlat = new HashSet<>();
int count = 0;
int totalFound = 0;
for (BioSequence sequence : sequencesToBlat) {
boolean found = false;
if (sequence.getSequenceDatabaseEntry() != null) {
Collection<BlatResult> brs = gpq.findAlignments(sequence.getSequenceDatabaseEntry().getAccession());
if (brs != null && brs.size() > 0) {
for (BlatResult result : brs) {
this.copyLengthInformation(sequence, result);
result.setQuerySequence(sequence);
}
results.put(sequence, brs);
found = true;
totalFound++;
}
}
if (++count % 1000 == 0 && totalFound > 0) {
ArrayDesignSequenceAlignmentServiceImpl.log.info("Alignments in Golden Path database for " + totalFound + "/" + count + " checked so far.");
}
if (!found) {
needBlat.add(sequence);
}
}
if (totalFound > 0) {
ArrayDesignSequenceAlignmentServiceImpl.log.info("Found " + totalFound + "/" + count + " alignments in Golden Path database");
}
return needBlat;
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.
the class CompositeSequenceDaoImpl method batchGetGenesWithSpecificity.
/**
* @param batch of composite sequences to process
* @param results - adding to this
*/
private void batchGetGenesWithSpecificity(Collection<CompositeSequence> batch, Map<CompositeSequence, Collection<BioSequence2GeneProduct>> results) {
if (batch.size() == 0) {
return;
}
// language=HQL
final String queryString = "select cs,bas from CompositeSequence cs, BioSequence2GeneProduct bas inner join cs.biologicalCharacteristic bs " + "inner join fetch bas.geneProduct gp inner join fetch gp.gene gene " + "where bas.bioSequence=bs and cs in (:cs)";
List<?> qr = this.getHibernateTemplate().findByNamedParam(queryString, "cs", batch);
for (Object o : qr) {
Object[] oa = (Object[]) o;
CompositeSequence csa = (CompositeSequence) oa[0];
BioSequence2GeneProduct ba = (BioSequence2GeneProduct) oa[1];
if (ba instanceof BlatAssociation) {
BlatResult blatResult = ((BlatAssociation) ba).getBlatResult();
PhysicalLocation pl = blatResult.getTargetAlignedRegion();
/*
* We didn't always used to fill in the targetAlignedRegion ... this is just in case.
*/
if (pl == null) {
pl = PhysicalLocation.Factory.newInstance();
pl.setChromosome(blatResult.getTargetChromosome());
pl.setNucleotide(blatResult.getTargetStart());
pl.setNucleotideLength(blatResult.getTargetEnd().intValue() - blatResult.getTargetStart().intValue());
pl.setStrand(blatResult.getStrand());
// Note: not bothering to fill in the bin.
}
}
if (!results.containsKey(csa)) {
results.put(csa, new HashSet<BioSequence2GeneProduct>());
}
results.get(csa).add(ba);
}
/*
* This is kind of important. We ensure we return an empty map for probes that do not have a mapping.
*/
for (CompositeSequence cs : batch) {
if (!results.containsKey(cs)) {
results.put(cs, new HashSet<BioSequence2GeneProduct>());
}
}
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.
the class ArrayDesignDaoImpl method deleteAlignmentData.
@Override
public void deleteAlignmentData(ArrayDesign arrayDesign) {
// First have to remove all blatAssociations, because they are referred to by the alignments
this.deleteGeneProductAssociations(arrayDesign);
// Note attempts to do this with bulk updates were unsuccessful due to the need for joins.
// language=HQL
final String queryString = "select br from ArrayDesign ad join ad.compositeSequences as cs " + "inner join cs.biologicalCharacteristic bs, BlatResult br " + "where br.querySequence = bs and ad=:arrayDesign";
// noinspection unchecked
List<BlatResult> toDelete = this.getSessionFactory().getCurrentSession().createQuery(queryString).setParameter("arrayDesign", arrayDesign).list();
AbstractDao.log.info("Deleting " + toDelete + " alignments for sequences on " + arrayDesign + " (will affect other designs that use any of the same sequences)");
for (BlatResult r : toDelete) {
this.getSessionFactory().getCurrentSession().delete(r);
}
}
Aggregations