Search in sources :

Example 6 with BlatResult

use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.

the class ProbeMapperImpl method processSequences.

@Override
public Map<String, Collection<BlatAssociation>> processSequences(GoldenPathSequenceAnalysis goldenpath, Collection<BioSequence> sequences, ProbeMapperConfig config) {
    Blat b = new ShellDelegatingBlat();
    b.setBlatScoreThreshold(config.getBlatScoreThreshold());
    try {
        Map<BioSequence, Collection<BlatResult>> results = b.blatQuery(sequences, goldenpath.getTaxon());
        Collection<BlatResult> blatRes = new HashSet<>();
        for (Collection<BlatResult> coll : results.values()) {
            blatRes.addAll(coll);
        }
        return this.processBlatResults(goldenpath, blatRes);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : Blat(ubic.gemma.core.apps.Blat) ShellDelegatingBlat(ubic.gemma.core.apps.ShellDelegatingBlat) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ShellDelegatingBlat(ubic.gemma.core.apps.ShellDelegatingBlat) Collection(java.util.Collection) IOException(java.io.IOException) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult) HashSet(java.util.HashSet)

Example 7 with BlatResult

use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.

the class ArrayDesignSequenceAlignmentServiceImpl method processArrayDesign.

@Override
public Collection<BlatResult> processArrayDesign(ArrayDesign ad, Taxon taxon, Collection<BlatResult> rawBlatResults) {
    ArrayDesignSequenceAlignmentServiceImpl.log.info("Looking for old results to remove...");
    ad = arrayDesignService.thaw(ad);
    arrayDesignService.deleteAlignmentData(ad);
    // Blat file processing can only be run on one taxon at a time
    taxon = this.validateTaxaForBlatFile(ad, taxon);
    Collection<BioSequence> sequencesToBlat = ArrayDesignSequenceAlignmentServiceImpl.getSequences(ad);
    sequencesToBlat = bioSequenceService.thaw(sequencesToBlat);
    // if the blat results were loaded from a file, we have to replace the
    // query sequences with the actual ones
    // attached to the array design. We have to do this by name because the
    // sequence name is what the files contain.
    // Note that if there is ambiguity there will be problems!
    Map<String, BioSequence> seqMap = new HashMap<>();
    for (BioSequence bioSequence : sequencesToBlat) {
        seqMap.put(bioSequence.getName(), bioSequence);
    }
    ExternalDatabase searchedDatabase = ShellDelegatingBlat.getSearchedGenome(taxon);
    Collection<BlatResult> toSkip = new HashSet<>();
    for (BlatResult result : rawBlatResults) {
        /*
             * If the sequences don't have ids, replace them with the actual sequences associated with the array design.
             */
        if (result.getQuerySequence().getId() == null) {
            String querySeqName = result.getQuerySequence().getName();
            BioSequence actualSequence = seqMap.get(querySeqName);
            if (actualSequence == null) {
                ArrayDesignSequenceAlignmentServiceImpl.log.debug("Array design does not contain a sequence with name " + querySeqName);
                toSkip.add(result);
                continue;
            }
            result.setQuerySequence(actualSequence);
        } else {
            result.getQuerySequence().setTaxon(taxon);
        }
        result.setSearchedDatabase(searchedDatabase);
        try {
            FieldUtils.writeField(result.getTargetChromosome(), "taxon", taxon, true);
        } catch (IllegalAccessException e) {
            e.printStackTrace();
        }
        result.getTargetChromosome().getSequence().setTaxon(taxon);
    }
    if (toSkip.size() > 0) {
        ArrayDesignSequenceAlignmentServiceImpl.log.warn(toSkip.size() + " blat results were for sequences not on " + ad + "; they will be ignored.");
        rawBlatResults.removeAll(toSkip);
    }
    Map<BioSequence, Collection<BlatResult>> goldenPathAlignments = new HashMap<>();
    this.getGoldenPathAlignments(sequencesToBlat, taxon, goldenPathAlignments);
    for (BioSequence sequence : goldenPathAlignments.keySet()) {
        rawBlatResults.addAll(goldenPathAlignments.get(sequence));
    }
    Collection<BlatResult> results = this.persistBlatResults(rawBlatResults);
    arrayDesignReportService.generateArrayDesignReport(ad.getId());
    return results;
}
Also used : ExternalDatabase(ubic.gemma.model.common.description.ExternalDatabase) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) HashMap(java.util.HashMap) Collection(java.util.Collection) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult) HashSet(java.util.HashSet)

Example 8 with BlatResult

use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.

the class ArrayDesignSequenceAlignmentServiceImpl method getGoldenPathAlignments.

/**
 * Check if there are alignment results in the goldenpath database, in which case we do not reanalyze the sequences.
 *
 * @param sequencesToBlat The full set of sequences that need analysis.
 * @param results         Will be stored here.
 * @return the sequences which ARE NOT found in goldenpath and which therefore DO need blat.
 */
private Collection<BioSequence> getGoldenPathAlignments(Collection<BioSequence> sequencesToBlat, Taxon taxon, Map<BioSequence, Collection<BlatResult>> results) {
    GoldenPathQuery gpq = new GoldenPathQuery(taxon);
    Collection<BioSequence> needBlat = new HashSet<>();
    int count = 0;
    int totalFound = 0;
    for (BioSequence sequence : sequencesToBlat) {
        boolean found = false;
        if (sequence.getSequenceDatabaseEntry() != null) {
            Collection<BlatResult> brs = gpq.findAlignments(sequence.getSequenceDatabaseEntry().getAccession());
            if (brs != null && brs.size() > 0) {
                for (BlatResult result : brs) {
                    this.copyLengthInformation(sequence, result);
                    result.setQuerySequence(sequence);
                }
                results.put(sequence, brs);
                found = true;
                totalFound++;
            }
        }
        if (++count % 1000 == 0 && totalFound > 0) {
            ArrayDesignSequenceAlignmentServiceImpl.log.info("Alignments in Golden Path database for " + totalFound + "/" + count + " checked so far.");
        }
        if (!found) {
            needBlat.add(sequence);
        }
    }
    if (totalFound > 0) {
        ArrayDesignSequenceAlignmentServiceImpl.log.info("Found " + totalFound + "/" + count + " alignments in Golden Path database");
    }
    return needBlat;
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) GoldenPathQuery(ubic.gemma.core.externalDb.GoldenPathQuery) HashSet(java.util.HashSet) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult)

Example 9 with BlatResult

use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.

the class CompositeSequenceDaoImpl method batchGetGenesWithSpecificity.

/**
 * @param batch   of composite sequences to process
 * @param results - adding to this
 */
private void batchGetGenesWithSpecificity(Collection<CompositeSequence> batch, Map<CompositeSequence, Collection<BioSequence2GeneProduct>> results) {
    if (batch.size() == 0) {
        return;
    }
    // language=HQL
    final String queryString = "select cs,bas from CompositeSequence cs, BioSequence2GeneProduct bas inner join cs.biologicalCharacteristic bs " + "inner join fetch bas.geneProduct gp inner join fetch gp.gene gene " + "where bas.bioSequence=bs and cs in (:cs)";
    List<?> qr = this.getHibernateTemplate().findByNamedParam(queryString, "cs", batch);
    for (Object o : qr) {
        Object[] oa = (Object[]) o;
        CompositeSequence csa = (CompositeSequence) oa[0];
        BioSequence2GeneProduct ba = (BioSequence2GeneProduct) oa[1];
        if (ba instanceof BlatAssociation) {
            BlatResult blatResult = ((BlatAssociation) ba).getBlatResult();
            PhysicalLocation pl = blatResult.getTargetAlignedRegion();
            /*
                 * We didn't always used to fill in the targetAlignedRegion ... this is just in case.
                 */
            if (pl == null) {
                pl = PhysicalLocation.Factory.newInstance();
                pl.setChromosome(blatResult.getTargetChromosome());
                pl.setNucleotide(blatResult.getTargetStart());
                pl.setNucleotideLength(blatResult.getTargetEnd().intValue() - blatResult.getTargetStart().intValue());
                pl.setStrand(blatResult.getStrand());
            // Note: not bothering to fill in the bin.
            }
        }
        if (!results.containsKey(csa)) {
            results.put(csa, new HashSet<BioSequence2GeneProduct>());
        }
        results.get(csa).add(ba);
    }
    /*
         * This is kind of important. We ensure we return an empty map for probes that do not have a mapping.
         */
    for (CompositeSequence cs : batch) {
        if (!results.containsKey(cs)) {
            results.put(cs, new HashSet<BioSequence2GeneProduct>());
        }
    }
}
Also used : BioSequence2GeneProduct(ubic.gemma.model.association.BioSequence2GeneProduct) CompositeSequenceValueObject(ubic.gemma.model.expression.designElement.CompositeSequenceValueObject) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult) PhysicalLocation(ubic.gemma.model.genome.PhysicalLocation)

Example 10 with BlatResult

use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.

the class ArrayDesignDaoImpl method deleteAlignmentData.

@Override
public void deleteAlignmentData(ArrayDesign arrayDesign) {
    // First have to remove all blatAssociations, because they are referred to by the alignments
    this.deleteGeneProductAssociations(arrayDesign);
    // Note attempts to do this with bulk updates were unsuccessful due to the need for joins.
    // language=HQL
    final String queryString = "select br from ArrayDesign ad join ad.compositeSequences as cs " + "inner join cs.biologicalCharacteristic bs, BlatResult br " + "where br.querySequence = bs and ad=:arrayDesign";
    // noinspection unchecked
    List<BlatResult> toDelete = this.getSessionFactory().getCurrentSession().createQuery(queryString).setParameter("arrayDesign", arrayDesign).list();
    AbstractDao.log.info("Deleting " + toDelete + " alignments for sequences on " + arrayDesign + " (will affect other designs that use any of the same sequences)");
    for (BlatResult r : toDelete) {
        this.getSessionFactory().getCurrentSession().delete(r);
    }
}
Also used : BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult)

Aggregations

BlatResult (ubic.gemma.model.genome.sequenceAnalysis.BlatResult)33 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)12 HashSet (java.util.HashSet)10 Collection (java.util.Collection)9 Taxon (ubic.gemma.model.genome.Taxon)6 Chromosome (ubic.gemma.model.genome.Chromosome)5 BlatAssociation (ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)5 HashMap (java.util.HashMap)4 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)4 PhysicalLocation (ubic.gemma.model.genome.PhysicalLocation)4 IOException (java.io.IOException)3 InputStream (java.io.InputStream)3 Blat (ubic.gemma.core.apps.Blat)3 ShellDelegatingBlat (ubic.gemma.core.apps.ShellDelegatingBlat)3 ExternalDatabase (ubic.gemma.model.common.description.ExternalDatabase)3 Test (org.junit.Test)2 BioSequence2GeneProduct (ubic.gemma.model.association.BioSequence2GeneProduct)2 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)2 ArrayList (java.util.ArrayList)1 Date (java.util.Date)1