Search in sources :

Example 16 with BlatAssociation

use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.

the class BlatAssociationScorer method removeExtraHitsPerGeneProduct.

/**
 * Compute scores and find the best one, for each gene product, removing all other hits (so there is just one per
 * gene product
 *
 * @param blatAssociations             blat assocs.
 * @param geneProduct2BlatAssociations gene prods 2 blat assocs
 * @return blat accoc
 */
private static BlatAssociation removeExtraHitsPerGeneProduct(Collection<BlatAssociation> blatAssociations, Map<GeneProduct, Collection<BlatAssociation>> geneProduct2BlatAssociations) {
    double globalMaxScore = 0.0;
    BlatAssociation globalBest = null;
    Collection<BlatAssociation> keepers = new HashSet<>();
    for (GeneProduct geneProduct : geneProduct2BlatAssociations.keySet()) {
        Collection<BlatAssociation> geneProductBlatAssociations = geneProduct2BlatAssociations.get(geneProduct);
        if (geneProductBlatAssociations.isEmpty())
            continue;
        BlatAssociation ba = geneProductBlatAssociations.iterator().next();
        // Find the best one. If there are ties it's arbitrary which one we pick.
        double maxScore = ba.getScore();
        BlatAssociation best = ba;
        for (BlatAssociation blatAssociation : geneProductBlatAssociations) {
            double score = blatAssociation.getScore();
            if (score >= maxScore) {
                maxScore = score;
                best = blatAssociation;
            }
        }
        // Remove the lower-scoring ones for this gene product
        Collection<BlatAssociation> toKeep = new HashSet<>();
        toKeep.add(best);
        keepers.add(best);
        geneProduct2BlatAssociations.put(geneProduct, toKeep);
        if (best.getScore() > globalMaxScore) {
            globalMaxScore = best.getScore();
            globalBest = best;
        }
    }
    blatAssociations.retainAll(keepers);
    return globalBest;
}
Also used : GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) HashSet(java.util.HashSet)

Example 17 with BlatAssociation

use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.

the class ProbeMapperImpl method filterOnScores.

/**
 * FIXME possibly implement checking the score, not just the exon overlap. As it stands, the scoring has already
 * done its work by removing redundant blat hits, and very weak hits are also already removed. I'm not convinced
 * more filtering on score is needed here.
 *
 * @param blatAssociationsForSequence associations for one sequence.
 * @return filtered collection
 */
private Collection<BlatAssociation> filterOnScores(Collection<BlatAssociation> blatAssociationsForSequence, ProbeMapperConfig config) {
    double minimumExonOverlapFraction = config.getMinimumExonOverlapFraction();
    if (minimumExonOverlapFraction == 0)
        return blatAssociationsForSequence;
    Collection<BlatAssociation> result = new HashSet<>();
    for (BlatAssociation ba : blatAssociationsForSequence) {
        if (BlatAssociationScorer.computeOverlapFraction(ba) < minimumExonOverlapFraction) {
            log.debug("Result failed to meet exon overlap threshold");
            continue;
        }
        result.add(ba);
    }
    return result;
}
Also used : BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) HashSet(java.util.HashSet)

Example 18 with BlatAssociation

use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorServiceTest method getGeneAssociatedWithEe.

private Collection<Gene> getGeneAssociatedWithEe(ExpressionExperiment ee) {
    Collection<ArrayDesign> ads = this.expressionExperimentService.getArrayDesignsUsed(ee);
    Collection<Gene> genes = new HashSet<>();
    for (ArrayDesign ad : ads) {
        Taxon taxon = this.getTaxon("mouse");
        ad = this.arrayDesignService.thaw(ad);
        for (CompositeSequence cs : ad.getCompositeSequences()) {
            Gene g = this.getTestPersistentGene();
            BlatAssociation blata = BlatAssociation.Factory.newInstance();
            blata.setGeneProduct(g.getProducts().iterator().next());
            BlatResult br = BlatResult.Factory.newInstance();
            BioSequence bs = BioSequence.Factory.newInstance();
            bs.setName(RandomStringUtils.random(10));
            bs.setTaxon(taxon);
            bs = (BioSequence) persisterHelper.persist(bs);
            assertNotNull(bs);
            cs.setBiologicalCharacteristic(bs);
            compositeSequenceService.update(cs);
            cs = compositeSequenceService.load(cs.getId());
            assertNotNull(cs.getBiologicalCharacteristic());
            br.setQuerySequence(bs);
            blata.setBlatResult(br);
            blata.setBioSequence(bs);
            persisterHelper.persist(blata);
            genes.add(g);
        }
    }
    return genes;
}
Also used : Gene(ubic.gemma.model.genome.Gene) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) Taxon(ubic.gemma.model.genome.Taxon) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) HashSet(java.util.HashSet) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult)

Example 19 with BlatAssociation

use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.

the class ProbeMapperTest method testIntronIssues.

public void testIntronIssues() {
    ProbeMapperConfig config = new ProbeMapperConfig();
    Collection<BlatAssociation> results = humangp.findAssociations("chr1", 145517370L, 145518088L, "145517370,145518070", "18,18", null, ThreePrimeDistanceMethod.RIGHT, config);
    TestCase.assertTrue(!results.isEmpty());
    for (BlatAssociation blatAssociation : results) {
        ProbeMapperTest.log.debug(blatAssociation);
        if (blatAssociation.getGeneProduct().getGene().getOfficialSymbol().equals("NBPF10")) {
            TestCase.fail("Should not have gotten NBPF10");
        }
    }
}
Also used : BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)

Example 20 with BlatAssociation

use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.

the class BlatAssociationDaoImpl method find.

@Override
public Collection<BlatAssociation> find(Gene gene) {
    if (gene.getProducts().size() == 0) {
        throw new IllegalArgumentException("Gene has no products");
    }
    Collection<BlatAssociation> result = new HashSet<>();
    for (GeneProduct geneProduct : gene.getProducts()) {
        BusinessKey.checkValidKey(geneProduct);
        Criteria queryObject = super.getSessionFactory().getCurrentSession().createCriteria(BlatAssociation.class);
        Criteria innerQuery = queryObject.createCriteria("geneProduct");
        if (StringUtils.isNotBlank(geneProduct.getNcbiGi())) {
            innerQuery.add(Restrictions.eq("ncbiGi", geneProduct.getNcbiGi()));
        }
        if (StringUtils.isNotBlank(geneProduct.getName())) {
            innerQuery.add(Restrictions.eq("name", geneProduct.getName()));
        }
        // noinspection unchecked
        result.addAll(queryObject.list());
    }
    return result;
}
Also used : GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) Criteria(org.hibernate.Criteria) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) HashSet(java.util.HashSet)

Aggregations

BlatAssociation (ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)24 HashSet (java.util.HashSet)10 GeneProduct (ubic.gemma.model.genome.gene.GeneProduct)8 Collection (java.util.Collection)7 HashMap (java.util.HashMap)5 BioSequence2GeneProduct (ubic.gemma.model.association.BioSequence2GeneProduct)5 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)5 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)5 BlatResult (ubic.gemma.model.genome.sequenceAnalysis.BlatResult)5 Gene (ubic.gemma.model.genome.Gene)4 PhysicalLocation (ubic.gemma.model.genome.PhysicalLocation)4 Taxon (ubic.gemma.model.genome.Taxon)3 AnnotationAssociation (ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation)3 DatabaseEntry (ubic.gemma.model.common.description.DatabaseEntry)2 ArrayList (java.util.ArrayList)1 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 Criteria (org.hibernate.Criteria)1 Test (org.junit.Test)1 HibernateTemplate (org.springframework.orm.hibernate3.HibernateTemplate)1