use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.
the class BlatAssociationScorer method removeExtraHitsPerGeneProduct.
/**
* Compute scores and find the best one, for each gene product, removing all other hits (so there is just one per
* gene product
*
* @param blatAssociations blat assocs.
* @param geneProduct2BlatAssociations gene prods 2 blat assocs
* @return blat accoc
*/
private static BlatAssociation removeExtraHitsPerGeneProduct(Collection<BlatAssociation> blatAssociations, Map<GeneProduct, Collection<BlatAssociation>> geneProduct2BlatAssociations) {
double globalMaxScore = 0.0;
BlatAssociation globalBest = null;
Collection<BlatAssociation> keepers = new HashSet<>();
for (GeneProduct geneProduct : geneProduct2BlatAssociations.keySet()) {
Collection<BlatAssociation> geneProductBlatAssociations = geneProduct2BlatAssociations.get(geneProduct);
if (geneProductBlatAssociations.isEmpty())
continue;
BlatAssociation ba = geneProductBlatAssociations.iterator().next();
// Find the best one. If there are ties it's arbitrary which one we pick.
double maxScore = ba.getScore();
BlatAssociation best = ba;
for (BlatAssociation blatAssociation : geneProductBlatAssociations) {
double score = blatAssociation.getScore();
if (score >= maxScore) {
maxScore = score;
best = blatAssociation;
}
}
// Remove the lower-scoring ones for this gene product
Collection<BlatAssociation> toKeep = new HashSet<>();
toKeep.add(best);
keepers.add(best);
geneProduct2BlatAssociations.put(geneProduct, toKeep);
if (best.getScore() > globalMaxScore) {
globalMaxScore = best.getScore();
globalBest = best;
}
}
blatAssociations.retainAll(keepers);
return globalBest;
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.
the class ProbeMapperImpl method filterOnScores.
/**
* FIXME possibly implement checking the score, not just the exon overlap. As it stands, the scoring has already
* done its work by removing redundant blat hits, and very weak hits are also already removed. I'm not convinced
* more filtering on score is needed here.
*
* @param blatAssociationsForSequence associations for one sequence.
* @return filtered collection
*/
private Collection<BlatAssociation> filterOnScores(Collection<BlatAssociation> blatAssociationsForSequence, ProbeMapperConfig config) {
double minimumExonOverlapFraction = config.getMinimumExonOverlapFraction();
if (minimumExonOverlapFraction == 0)
return blatAssociationsForSequence;
Collection<BlatAssociation> result = new HashSet<>();
for (BlatAssociation ba : blatAssociationsForSequence) {
if (BlatAssociationScorer.computeOverlapFraction(ba) < minimumExonOverlapFraction) {
log.debug("Result failed to meet exon overlap threshold");
continue;
}
result.add(ba);
}
return result;
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorServiceTest method getGeneAssociatedWithEe.
private Collection<Gene> getGeneAssociatedWithEe(ExpressionExperiment ee) {
Collection<ArrayDesign> ads = this.expressionExperimentService.getArrayDesignsUsed(ee);
Collection<Gene> genes = new HashSet<>();
for (ArrayDesign ad : ads) {
Taxon taxon = this.getTaxon("mouse");
ad = this.arrayDesignService.thaw(ad);
for (CompositeSequence cs : ad.getCompositeSequences()) {
Gene g = this.getTestPersistentGene();
BlatAssociation blata = BlatAssociation.Factory.newInstance();
blata.setGeneProduct(g.getProducts().iterator().next());
BlatResult br = BlatResult.Factory.newInstance();
BioSequence bs = BioSequence.Factory.newInstance();
bs.setName(RandomStringUtils.random(10));
bs.setTaxon(taxon);
bs = (BioSequence) persisterHelper.persist(bs);
assertNotNull(bs);
cs.setBiologicalCharacteristic(bs);
compositeSequenceService.update(cs);
cs = compositeSequenceService.load(cs.getId());
assertNotNull(cs.getBiologicalCharacteristic());
br.setQuerySequence(bs);
blata.setBlatResult(br);
blata.setBioSequence(bs);
persisterHelper.persist(blata);
genes.add(g);
}
}
return genes;
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.
the class ProbeMapperTest method testIntronIssues.
public void testIntronIssues() {
ProbeMapperConfig config = new ProbeMapperConfig();
Collection<BlatAssociation> results = humangp.findAssociations("chr1", 145517370L, 145518088L, "145517370,145518070", "18,18", null, ThreePrimeDistanceMethod.RIGHT, config);
TestCase.assertTrue(!results.isEmpty());
for (BlatAssociation blatAssociation : results) {
ProbeMapperTest.log.debug(blatAssociation);
if (blatAssociation.getGeneProduct().getGene().getOfficialSymbol().equals("NBPF10")) {
TestCase.fail("Should not have gotten NBPF10");
}
}
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.
the class BlatAssociationDaoImpl method find.
@Override
public Collection<BlatAssociation> find(Gene gene) {
if (gene.getProducts().size() == 0) {
throw new IllegalArgumentException("Gene has no products");
}
Collection<BlatAssociation> result = new HashSet<>();
for (GeneProduct geneProduct : gene.getProducts()) {
BusinessKey.checkValidKey(geneProduct);
Criteria queryObject = super.getSessionFactory().getCurrentSession().createCriteria(BlatAssociation.class);
Criteria innerQuery = queryObject.createCriteria("geneProduct");
if (StringUtils.isNotBlank(geneProduct.getNcbiGi())) {
innerQuery.add(Restrictions.eq("ncbiGi", geneProduct.getNcbiGi()));
}
if (StringUtils.isNotBlank(geneProduct.getName())) {
innerQuery.add(Restrictions.eq("name", geneProduct.getName()));
}
// noinspection unchecked
result.addAll(queryObject.list());
}
return result;
}
Aggregations