use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.
the class GeneProductServiceImpl method remove.
@Override
@Transactional
public void remove(Collection<GeneProduct> toRemove) {
Collection<BlatAssociation> associations = this.blatAssociationDao.find(toRemove);
if (!associations.isEmpty()) {
AbstractService.log.info("Removing " + associations.size() + " blat associations involving up to " + toRemove.size() + " products.");
this.blatAssociationDao.remove(associations);
}
Collection<AnnotationAssociation> annotationAssociations = this.annotationAssociationDao.find(toRemove);
if (!annotationAssociations.isEmpty()) {
AbstractService.log.info("Removing " + annotationAssociations.size() + " annotationAssociations involving up to " + toRemove.size() + " products.");
this.annotationAssociationDao.remove(annotationAssociations);
}
// remove associations to database entries that are still associated with sequences.
for (GeneProduct gp : toRemove) {
gp = this.thaw(gp);
Collection<DatabaseEntry> accessions = gp.getAccessions();
Collection<DatabaseEntry> toRelease = new HashSet<>();
for (DatabaseEntry de : accessions) {
if (this.bioSequenceDao.findByAccession(de) != null) {
toRelease.add(de);
}
}
gp.getAccessions().removeAll(toRelease);
this.geneProductDao.remove(gp);
}
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.
the class GenomePersister method removeGeneProducts.
private void removeGeneProducts(Collection<GeneProduct> toRemove) {
Collection<BlatAssociation> associations = this.blatAssociationDao.find(toRemove);
if (!associations.isEmpty()) {
AbstractPersister.log.info("Removing " + associations.size() + " blat associations involving up to " + toRemove.size() + " products.");
this.blatAssociationDao.remove(associations);
}
Collection<AnnotationAssociation> annotationAssociations = this.annotationAssociationDao.find(toRemove);
if (!annotationAssociations.isEmpty()) {
AbstractPersister.log.info("Removing " + annotationAssociations.size() + " annotationAssociations involving up to " + toRemove.size() + " products.");
this.annotationAssociationDao.remove(annotationAssociations);
}
// remove associations to database entries that are still associated with sequences.
for (GeneProduct gp : toRemove) {
Collection<DatabaseEntry> accessions = gp.getAccessions();
Collection<DatabaseEntry> toRelease = new HashSet<>();
for (DatabaseEntry de : accessions) {
if (this.bioSequenceDao.findByAccession(de) != null) {
toRelease.add(de);
}
}
gp.getAccessions().removeAll(toRelease);
this.geneProductDao.remove(gp);
}
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.
the class BlatAssociationScorer method organizeBlatAssociationsByGeneProductAndInitializeScores.
/**
* Break results down by gene product, and throw out duplicates (only allow one result per gene product), fills in
* score and initializes specificity
*
* @param blatAssociations blat assocs
* @return map
*/
private static Map<GeneProduct, Collection<BlatAssociation>> organizeBlatAssociationsByGeneProductAndInitializeScores(Collection<BlatAssociation> blatAssociations) {
Map<GeneProduct, Collection<BlatAssociation>> geneProducts = new HashMap<>();
Collection<BioSequence> sequences = new HashSet<>();
for (BlatAssociation blatAssociation : blatAssociations) {
assert blatAssociation.getBioSequence() != null;
BlatAssociationScorer.computeScore(blatAssociation);
sequences.add(blatAssociation.getBioSequence());
if (sequences.size() > 1) {
throw new IllegalArgumentException("Blat associations must all be for the same query sequence");
}
assert blatAssociation.getGeneProduct() != null;
GeneProduct geneProduct = blatAssociation.getGeneProduct();
if (!geneProducts.containsKey(geneProduct)) {
geneProducts.put(geneProduct, new HashSet<BlatAssociation>());
}
geneProducts.get(geneProduct).add(blatAssociation);
// an initial value.
blatAssociation.setSpecificity(1.0);
}
return geneProducts;
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.
the class BlatAssociationScorer method clusterGenes.
/**
* @param associations assocs.
* @return map of physical locations for the alignments, and which genes are found there.
*/
private static Map<PhysicalLocation, Collection<Gene>> clusterGenes(Map<Gene, Collection<BlatAssociation>> associations) {
Map<PhysicalLocation, Collection<Gene>> clusters = new HashMap<>();
for (Gene gene : associations.keySet()) {
Collection<BlatAssociation> geneAssoc = associations.get(gene);
for (BlatAssociation ba : geneAssoc) {
PhysicalLocation pl = ba.getBlatResult().getTargetAlignedRegion();
if (!clusters.containsKey(pl)) {
clusters.put(pl, new HashSet<Gene>());
}
clusters.get(pl).add(gene);
}
}
// debugging information about clusters.
if (BlatAssociationScorer.log.isDebugEnabled()) {
for (PhysicalLocation pl : clusters.keySet()) {
if (clusters.get(pl).size() > 1) {
BlatAssociationScorer.log.debug("Cluster at " + pl + " with " + clusters.get(pl).size() + " members:\n" + StringUtils.join(clusters.get(pl).iterator(), "\n"));
}
}
}
return clusters;
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.
the class BlatAssociationScorer method scoreResults.
/**
* From a collection of BlatAssociations from a single BioSequence, reduce redundancy, fill in the specificity and
* score and pick the one with the best scoring statistics.
* This is a little complicated because a single sequence can yield many BlatResults to the same gene and/or gene
* product. We reduce the results down to a single (best) result for any given gene product. We also score
* specificity by the gene: if a sequence 'hits' multiple genes, then the specificity of the generated associations
* will be less than 1.
*
* @param blatAssociations for a single sequence.
* @return the highest-scoring result (if there are ties this will be a random one). Note that this return value is
* not all that useful because it assumes there is a "clear winner". The passed-in blatAssociations will be
* pruned to remove redundant entries, and will have score information filled in as well. It is intended
* that these 'refined' BlatAssociations will be used in further analysis.
* @throws IllegalArgumentException if the blatAssociations are from multiple biosequences.
*/
public static BlatAssociation scoreResults(Collection<BlatAssociation> blatAssociations) {
Map<GeneProduct, Collection<BlatAssociation>> geneProducts2Associations = BlatAssociationScorer.organizeBlatAssociationsByGeneProductAndInitializeScores(blatAssociations);
BlatAssociation globalBest = BlatAssociationScorer.removeExtraHitsPerGeneProduct(blatAssociations, geneProducts2Associations);
assert blatAssociations.size() > 0;
Map<Gene, Collection<BlatAssociation>> genes2Associations = BlatAssociationScorer.organizeBlatAssociationsByGene(blatAssociations);
assert genes2Associations.size() > 0;
/*
* At this point there should be just one blatAssociation per gene product. However, all of these really might
* be for the same gene. It is only in the case of truly multiple genes that we flag a lower specificity.
*/
if (genes2Associations.size() == 1) {
return globalBest;
}
Map<PhysicalLocation, Collection<Gene>> geneClusters = BlatAssociationScorer.clusterGenes(genes2Associations);
// compute specificity at the level of genes. First, get the best score for each gene cluster.
Map<PhysicalLocation, Double> scores = new HashMap<>();
for (PhysicalLocation pl : geneClusters.keySet()) {
Double geneScore = 0.0;
for (Gene cgene : geneClusters.get(pl)) {
for (BlatAssociation blatAssociation : genes2Associations.get(cgene)) {
Double alignScore = blatAssociation.getScore();
if (alignScore > geneScore) {
geneScore = alignScore;
}
}
}
scores.put(pl, geneScore);
}
for (PhysicalLocation pl : geneClusters.keySet()) {
Double alignScore = scores.get(pl);
for (Gene cgene : geneClusters.get(pl)) {
// All members of the cluster get the same specificity.
for (BlatAssociation blatAssociation : genes2Associations.get(cgene)) {
blatAssociation.setSpecificity(BlatAssociationScorer.computeSpecificity(scores.values(), alignScore));
}
}
}
return globalBest;
}
Aggregations