Search in sources :

Example 21 with BlatResult

use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.

the class BlatResultDaoImpl method findByBioSequence.

@SuppressWarnings("unchecked")
@Override
public Collection<BlatResult> findByBioSequence(BioSequence bioSequence) {
    BusinessKey.checkValidKey(bioSequence);
    Criteria queryObject = this.getSessionFactory().getCurrentSession().createCriteria(BlatResult.class);
    BusinessKey.attachCriteria(queryObject, bioSequence, "querySequence");
    List<?> results = queryObject.list();
    if (results != null) {
        for (Object object : results) {
            BlatResult br = (BlatResult) object;
            if (br.getTargetChromosome() != null) {
                Hibernate.initialize(br.getTargetChromosome());
            }
            Hibernate.initialize(br.getQuerySequence());
        }
    }
    return (Collection<BlatResult>) results;
}
Also used : Collection(java.util.Collection) BlatResultValueObject(ubic.gemma.model.genome.sequenceAnalysis.BlatResultValueObject) Criteria(org.hibernate.Criteria) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult)

Example 22 with BlatResult

use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.

the class ArrayDesignProbeMapperServiceImpl method processCompositeSequence.

@Override
@Transactional
public Map<String, Collection<BlatAssociation>> processCompositeSequence(ProbeMapperConfig config, Taxon taxon, GoldenPathSequenceAnalysis goldenPathDb, CompositeSequence compositeSequence) {
    BioSequence bs = compositeSequence.getBiologicalCharacteristic();
    if (bs == null)
        return null;
    /*
         * It isn't 100% clear what the right thing to do is. But this seems at least _reasonable_ when there is a
         * mismatch
         */
    if (taxon != null && !bs.getTaxon().equals(taxon)) {
        return null;
    }
    GoldenPathSequenceAnalysis db;
    if (goldenPathDb == null) {
        db = new GoldenPathSequenceAnalysis(bs.getTaxon());
    } else {
        db = goldenPathDb;
    }
    final Collection<BlatResult> blatResults = blatResultService.findByBioSequence(bs);
    ProbeMapUtils.removeDuplicates(blatResults);
    if (blatResults.isEmpty())
        return null;
    return probeMapper.processBlatResults(db, blatResults, config);
}
Also used : GoldenPathSequenceAnalysis(ubic.gemma.core.externalDb.GoldenPathSequenceAnalysis) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult) Transactional(org.springframework.transaction.annotation.Transactional)

Example 23 with BlatResult

use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.

the class ArrayDesignSequenceAlignmentServiceImpl method persistBlatResults.

/**
 * @param brs, assumed to be from alignments to the genome for the array design (that is, we don't consider aligning
 *             mouse to human)
 */
@SuppressWarnings("unchecked")
private Collection<BlatResult> persistBlatResults(Collection<BlatResult> brs) {
    Collection<Integer> seen = new HashSet<>();
    int duplicates = 0;
    for (BlatResult br : brs) {
        Integer hash = ProbeMapUtils.hashBlatResult(br);
        if (seen.contains(hash)) {
            duplicates++;
            continue;
        }
        seen.add(hash);
        assert br.getQuerySequence() != null;
        assert br.getQuerySequence().getName() != null;
        Taxon taxon = br.getQuerySequence().getTaxon();
        assert taxon != null;
        try {
            FieldUtils.writeField(br.getTargetChromosome(), "taxon", taxon, true);
        } catch (IllegalAccessException e) {
            e.printStackTrace();
        }
        br.getTargetChromosome().getSequence().setTaxon(taxon);
        PhysicalLocation pl = br.getTargetAlignedRegion();
        if (pl == null) {
            pl = PhysicalLocation.Factory.newInstance();
            pl.setChromosome(br.getTargetChromosome());
            pl.setNucleotide(br.getTargetStart());
            assert br.getTargetEnd() != null && br.getTargetStart() != null;
            pl.setNucleotideLength(br.getTargetEnd().intValue() - br.getTargetStart().intValue());
            pl.setStrand(br.getStrand());
            br.setTargetAlignedRegion(pl);
            pl.setBin(SequenceBinUtils.binFromRange(br.getTargetStart().intValue(), br.getTargetEnd().intValue()));
        }
    }
    if (duplicates > 0) {
        ArrayDesignSequenceAlignmentServiceImpl.log.info(duplicates + " duplicate BLAT hits skipped");
    }
    return (Collection<BlatResult>) persisterHelper.persist(brs);
}
Also used : Taxon(ubic.gemma.model.genome.Taxon) Collection(java.util.Collection) HashSet(java.util.HashSet) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult) PhysicalLocation(ubic.gemma.model.genome.PhysicalLocation)

Example 24 with BlatResult

use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.

the class ArrayDesignSequenceAlignmentServiceImpl method processArrayDesign.

private Collection<BlatResult> processArrayDesign(ArrayDesign ad, boolean sensitive, Blat blat) {
    if (blat == null)
        blat = new ShellDelegatingBlat();
    Collection<BlatResult> allResults = new HashSet<>();
    if (sensitive)
        ArrayDesignSequenceAlignmentServiceImpl.log.info("Running in 'sensitive' mode if possible");
    Collection<Taxon> taxa = arrayDesignService.getTaxa(ad.getId());
    boolean first = true;
    for (Taxon taxon : taxa) {
        Collection<BioSequence> sequencesToBlat = ArrayDesignSequenceAlignmentServiceImpl.getSequences(ad, taxon);
        Map<BioSequence, Collection<BlatResult>> results = this.getAlignments(sequencesToBlat, sensitive, taxon, blat);
        ArrayDesignSequenceAlignmentServiceImpl.log.info("Got BLAT results for " + results.keySet().size() + " query sequences");
        Map<String, BioSequence> nameMap = new HashMap<>();
        for (BioSequence bs : results.keySet()) {
            if (nameMap.containsKey(bs.getName())) {
                throw new IllegalStateException("All distinct sequences on the array must have unique names; found " + bs.getName() + " more than once.");
            }
            nameMap.put(bs.getName(), bs);
        }
        int noResults = 0;
        int count = 0;
        // We only remove the results here, after we have at least one set of blat results.
        if (first) {
            ArrayDesignSequenceAlignmentServiceImpl.log.info("Looking for old results to remove...");
            arrayDesignService.deleteAlignmentData(ad);
        }
        for (BioSequence sequence : sequencesToBlat) {
            if (sequence == null) {
                ArrayDesignSequenceAlignmentServiceImpl.log.warn("Null sequence!");
                continue;
            }
            Collection<BlatResult> brs = results.get(nameMap.get(sequence.getName()));
            if (brs == null) {
                ++noResults;
                continue;
            }
            for (BlatResult result : brs) {
                // must do this to replace
                result.setQuerySequence(sequence);
            // placeholder instance.
            }
            allResults.addAll(this.persistBlatResults(brs));
            if (++count % 2000 == 0) {
                ArrayDesignSequenceAlignmentServiceImpl.log.info("Checked results for " + count + " queries, " + allResults.size() + " blat results so far.");
            }
        }
        ArrayDesignSequenceAlignmentServiceImpl.log.info(noResults + "/" + sequencesToBlat.size() + " sequences had no blat results");
        first = false;
    }
    arrayDesignReportService.generateArrayDesignReport(ad.getId());
    return allResults;
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) HashMap(java.util.HashMap) Taxon(ubic.gemma.model.genome.Taxon) ShellDelegatingBlat(ubic.gemma.core.apps.ShellDelegatingBlat) Collection(java.util.Collection) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult) HashSet(java.util.HashSet)

Example 25 with BlatResult

use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.

the class MockBlat method blatQuery.

@Override
public Collection<BlatResult> blatQuery(BioSequence b) {
    Collection<BlatResult> result = new HashSet<>();
    BioSequence chromseq = PersistentDummyObjectHelper.getTestNonPersistentBioSequence(taxon);
    chromseq.setLength((long) 1e7);
    BlatResult br = BlatResult.Factory.newInstance();
    Chromosome chromosome = new Chromosome("XXX", null, chromseq, taxon);
    br.setTargetChromosome(chromosome);
    assert br.getTargetChromosome().getSequence() != null;
    long targetStart = MockBlat.RANDOM.nextInt(chromseq.getLength().intValue());
    br.setQuerySequence(b);
    br.setTargetStart(targetStart);
    br.setTargetEnd(targetStart + b.getLength());
    br.setMatches((int) (b.getLength() - 1));
    br.setMismatches(1);
    br.setRepMatches(0);
    br.setQueryGapCount(0);
    br.setQueryGapBases(0);
    br.setQueryStart(0);
    br.setQueryEnd(b.getLength().intValue());
    br.setTargetGapBases(0);
    br.setTargetGapCount(0);
    PhysicalLocation targetAlignedRegion = PhysicalLocation.Factory.newInstance();
    targetAlignedRegion.setChromosome(br.getTargetChromosome());
    targetAlignedRegion.setNucleotide(targetStart);
    targetAlignedRegion.setNucleotideLength(b.getLength().intValue());
    targetAlignedRegion.setStrand("+");
    result.add(br);
    return result;
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) Chromosome(ubic.gemma.model.genome.Chromosome) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult) PhysicalLocation(ubic.gemma.model.genome.PhysicalLocation)

Aggregations

BlatResult (ubic.gemma.model.genome.sequenceAnalysis.BlatResult)33 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)12 HashSet (java.util.HashSet)10 Collection (java.util.Collection)9 Taxon (ubic.gemma.model.genome.Taxon)6 Chromosome (ubic.gemma.model.genome.Chromosome)5 BlatAssociation (ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)5 HashMap (java.util.HashMap)4 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)4 PhysicalLocation (ubic.gemma.model.genome.PhysicalLocation)4 IOException (java.io.IOException)3 InputStream (java.io.InputStream)3 Blat (ubic.gemma.core.apps.Blat)3 ShellDelegatingBlat (ubic.gemma.core.apps.ShellDelegatingBlat)3 ExternalDatabase (ubic.gemma.model.common.description.ExternalDatabase)3 Test (org.junit.Test)2 BioSequence2GeneProduct (ubic.gemma.model.association.BioSequence2GeneProduct)2 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)2 ArrayList (java.util.ArrayList)1 Date (java.util.Date)1