Search in sources :

Example 11 with BlatAssociation

use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.

the class GeneProductServiceImpl method remove.

@Override
@Transactional
public void remove(Collection<GeneProduct> toRemove) {
    Collection<BlatAssociation> associations = this.blatAssociationDao.find(toRemove);
    if (!associations.isEmpty()) {
        AbstractService.log.info("Removing " + associations.size() + " blat associations involving up to " + toRemove.size() + " products.");
        this.blatAssociationDao.remove(associations);
    }
    Collection<AnnotationAssociation> annotationAssociations = this.annotationAssociationDao.find(toRemove);
    if (!annotationAssociations.isEmpty()) {
        AbstractService.log.info("Removing " + annotationAssociations.size() + " annotationAssociations involving up to " + toRemove.size() + " products.");
        this.annotationAssociationDao.remove(annotationAssociations);
    }
    // remove associations to database entries that are still associated with sequences.
    for (GeneProduct gp : toRemove) {
        gp = this.thaw(gp);
        Collection<DatabaseEntry> accessions = gp.getAccessions();
        Collection<DatabaseEntry> toRelease = new HashSet<>();
        for (DatabaseEntry de : accessions) {
            if (this.bioSequenceDao.findByAccession(de) != null) {
                toRelease.add(de);
            }
        }
        gp.getAccessions().removeAll(toRelease);
        this.geneProductDao.remove(gp);
    }
}
Also used : GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) AnnotationAssociation(ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) HashSet(java.util.HashSet) Transactional(org.springframework.transaction.annotation.Transactional)

Example 12 with BlatAssociation

use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.

the class GenomePersister method removeGeneProducts.

private void removeGeneProducts(Collection<GeneProduct> toRemove) {
    Collection<BlatAssociation> associations = this.blatAssociationDao.find(toRemove);
    if (!associations.isEmpty()) {
        AbstractPersister.log.info("Removing " + associations.size() + " blat associations involving up to " + toRemove.size() + " products.");
        this.blatAssociationDao.remove(associations);
    }
    Collection<AnnotationAssociation> annotationAssociations = this.annotationAssociationDao.find(toRemove);
    if (!annotationAssociations.isEmpty()) {
        AbstractPersister.log.info("Removing " + annotationAssociations.size() + " annotationAssociations involving up to " + toRemove.size() + " products.");
        this.annotationAssociationDao.remove(annotationAssociations);
    }
    // remove associations to database entries that are still associated with sequences.
    for (GeneProduct gp : toRemove) {
        Collection<DatabaseEntry> accessions = gp.getAccessions();
        Collection<DatabaseEntry> toRelease = new HashSet<>();
        for (DatabaseEntry de : accessions) {
            if (this.bioSequenceDao.findByAccession(de) != null) {
                toRelease.add(de);
            }
        }
        gp.getAccessions().removeAll(toRelease);
        this.geneProductDao.remove(gp);
    }
}
Also used : BioSequence2GeneProduct(ubic.gemma.model.association.BioSequence2GeneProduct) GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) AnnotationAssociation(ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) HashSet(java.util.HashSet)

Example 13 with BlatAssociation

use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.

the class BlatAssociationScorer method organizeBlatAssociationsByGeneProductAndInitializeScores.

/**
 * Break results down by gene product, and throw out duplicates (only allow one result per gene product), fills in
 * score and initializes specificity
 *
 * @param blatAssociations blat assocs
 * @return map
 */
private static Map<GeneProduct, Collection<BlatAssociation>> organizeBlatAssociationsByGeneProductAndInitializeScores(Collection<BlatAssociation> blatAssociations) {
    Map<GeneProduct, Collection<BlatAssociation>> geneProducts = new HashMap<>();
    Collection<BioSequence> sequences = new HashSet<>();
    for (BlatAssociation blatAssociation : blatAssociations) {
        assert blatAssociation.getBioSequence() != null;
        BlatAssociationScorer.computeScore(blatAssociation);
        sequences.add(blatAssociation.getBioSequence());
        if (sequences.size() > 1) {
            throw new IllegalArgumentException("Blat associations must all be for the same query sequence");
        }
        assert blatAssociation.getGeneProduct() != null;
        GeneProduct geneProduct = blatAssociation.getGeneProduct();
        if (!geneProducts.containsKey(geneProduct)) {
            geneProducts.put(geneProduct, new HashSet<BlatAssociation>());
        }
        geneProducts.get(geneProduct).add(blatAssociation);
        // an initial value.
        blatAssociation.setSpecificity(1.0);
    }
    return geneProducts;
}
Also used : GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) HashMap(java.util.HashMap) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) Collection(java.util.Collection) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) HashSet(java.util.HashSet)

Example 14 with BlatAssociation

use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.

the class BlatAssociationScorer method clusterGenes.

/**
 * @param associations assocs.
 * @return map of physical locations for the alignments, and which genes are found there.
 */
private static Map<PhysicalLocation, Collection<Gene>> clusterGenes(Map<Gene, Collection<BlatAssociation>> associations) {
    Map<PhysicalLocation, Collection<Gene>> clusters = new HashMap<>();
    for (Gene gene : associations.keySet()) {
        Collection<BlatAssociation> geneAssoc = associations.get(gene);
        for (BlatAssociation ba : geneAssoc) {
            PhysicalLocation pl = ba.getBlatResult().getTargetAlignedRegion();
            if (!clusters.containsKey(pl)) {
                clusters.put(pl, new HashSet<Gene>());
            }
            clusters.get(pl).add(gene);
        }
    }
    // debugging information about clusters.
    if (BlatAssociationScorer.log.isDebugEnabled()) {
        for (PhysicalLocation pl : clusters.keySet()) {
            if (clusters.get(pl).size() > 1) {
                BlatAssociationScorer.log.debug("Cluster at " + pl + " with " + clusters.get(pl).size() + " members:\n" + StringUtils.join(clusters.get(pl).iterator(), "\n"));
            }
        }
    }
    return clusters;
}
Also used : Gene(ubic.gemma.model.genome.Gene) HashMap(java.util.HashMap) Collection(java.util.Collection) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) PhysicalLocation(ubic.gemma.model.genome.PhysicalLocation)

Example 15 with BlatAssociation

use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.

the class BlatAssociationScorer method scoreResults.

/**
 * From a collection of BlatAssociations from a single BioSequence, reduce redundancy, fill in the specificity and
 * score and pick the one with the best scoring statistics.
 * This is a little complicated because a single sequence can yield many BlatResults to the same gene and/or gene
 * product. We reduce the results down to a single (best) result for any given gene product. We also score
 * specificity by the gene: if a sequence 'hits' multiple genes, then the specificity of the generated associations
 * will be less than 1.
 *
 * @param blatAssociations for a single sequence.
 * @return the highest-scoring result (if there are ties this will be a random one). Note that this return value is
 * not all that useful because it assumes there is a "clear winner". The passed-in blatAssociations will be
 * pruned to remove redundant entries, and will have score information filled in as well. It is intended
 * that these 'refined' BlatAssociations will be used in further analysis.
 * @throws IllegalArgumentException if the blatAssociations are from multiple biosequences.
 */
public static BlatAssociation scoreResults(Collection<BlatAssociation> blatAssociations) {
    Map<GeneProduct, Collection<BlatAssociation>> geneProducts2Associations = BlatAssociationScorer.organizeBlatAssociationsByGeneProductAndInitializeScores(blatAssociations);
    BlatAssociation globalBest = BlatAssociationScorer.removeExtraHitsPerGeneProduct(blatAssociations, geneProducts2Associations);
    assert blatAssociations.size() > 0;
    Map<Gene, Collection<BlatAssociation>> genes2Associations = BlatAssociationScorer.organizeBlatAssociationsByGene(blatAssociations);
    assert genes2Associations.size() > 0;
    /*
         * At this point there should be just one blatAssociation per gene product. However, all of these really might
         * be for the same gene. It is only in the case of truly multiple genes that we flag a lower specificity.
         */
    if (genes2Associations.size() == 1) {
        return globalBest;
    }
    Map<PhysicalLocation, Collection<Gene>> geneClusters = BlatAssociationScorer.clusterGenes(genes2Associations);
    // compute specificity at the level of genes. First, get the best score for each gene cluster.
    Map<PhysicalLocation, Double> scores = new HashMap<>();
    for (PhysicalLocation pl : geneClusters.keySet()) {
        Double geneScore = 0.0;
        for (Gene cgene : geneClusters.get(pl)) {
            for (BlatAssociation blatAssociation : genes2Associations.get(cgene)) {
                Double alignScore = blatAssociation.getScore();
                if (alignScore > geneScore) {
                    geneScore = alignScore;
                }
            }
        }
        scores.put(pl, geneScore);
    }
    for (PhysicalLocation pl : geneClusters.keySet()) {
        Double alignScore = scores.get(pl);
        for (Gene cgene : geneClusters.get(pl)) {
            // All members of the cluster get the same specificity.
            for (BlatAssociation blatAssociation : genes2Associations.get(cgene)) {
                blatAssociation.setSpecificity(BlatAssociationScorer.computeSpecificity(scores.values(), alignScore));
            }
        }
    }
    return globalBest;
}
Also used : GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) Gene(ubic.gemma.model.genome.Gene) HashMap(java.util.HashMap) Collection(java.util.Collection) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) PhysicalLocation(ubic.gemma.model.genome.PhysicalLocation)

Aggregations

BlatAssociation (ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)24 HashSet (java.util.HashSet)10 GeneProduct (ubic.gemma.model.genome.gene.GeneProduct)8 Collection (java.util.Collection)7 HashMap (java.util.HashMap)5 BioSequence2GeneProduct (ubic.gemma.model.association.BioSequence2GeneProduct)5 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)5 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)5 BlatResult (ubic.gemma.model.genome.sequenceAnalysis.BlatResult)5 Gene (ubic.gemma.model.genome.Gene)4 PhysicalLocation (ubic.gemma.model.genome.PhysicalLocation)4 Taxon (ubic.gemma.model.genome.Taxon)3 AnnotationAssociation (ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation)3 DatabaseEntry (ubic.gemma.model.common.description.DatabaseEntry)2 ArrayList (java.util.ArrayList)1 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 Criteria (org.hibernate.Criteria)1 Test (org.junit.Test)1 HibernateTemplate (org.springframework.orm.hibernate3.HibernateTemplate)1