use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.
the class ProbeMapperTest method testProcessBlatResults.
public void testProcessBlatResults() {
ProbeMapperConfig config = new ProbeMapperConfig();
// test is sensitive to this.
config.setMinimumExonOverlapFraction(0);
ProbeMapper pm = new ProbeMapperImpl();
Map<String, Collection<BlatAssociation>> res = pm.processBlatResults(mousegp, blatres, config);
TestCase.assertTrue("No results", res.values().size() > 0);
TestCase.assertTrue("No results", res.values().iterator().next().size() > 0);
boolean found = false;
for (Collection<BlatAssociation> r : res.values()) {
for (BlatAssociation blatAssociation : r) {
if ("Filip1l".equals(blatAssociation.getGeneProduct().getGene().getOfficialSymbol())) {
found = true;
}
}
}
TestCase.assertTrue(found);
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.
the class ArrayDesignMapResultServiceImpl method summarizeMapResults.
@Override
public Collection<CompositeSequenceMapSummary> summarizeMapResults(Collection<CompositeSequence> compositeSequences) {
Collection<CompositeSequenceMapSummary> result = new HashSet<>();
int count = 0;
for (CompositeSequence cs : compositeSequences) {
CompositeSequenceMapSummary summary = new CompositeSequenceMapSummary(cs);
BioSequence bioSequence = cs.getBiologicalCharacteristic();
if (bioSequence == null) {
result.add(summary);
continue;
}
Collection<BlatResult> blats = blatResultService.findByBioSequence(bioSequence);
summary.setBlatResults(blats);
Collection<BlatAssociation> maps = blatAssociationService.find(bioSequence);
blatAssociationService.thaw(maps);
for (BlatAssociation association : maps) {
summary.getGeneProducts().add(association.getGeneProduct());
summary.getGenes().add(association.getGeneProduct().getGene());
}
result.add(summary);
if (++count % 1000 == 0) {
ArrayDesignMapResultServiceImpl.log.info("Processed " + count + " elements...");
}
}
ArrayDesignMapResultServiceImpl.log.info("Done, processed " + count + " elements");
return result;
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.
the class ArrayDesignProbeMapperCli method processProbes.
private void processProbes(ArrayDesign arrayDesign) {
assert this.probeNames != null && this.probeNames.length > 0;
arrayDesign = arrayDesignService.thawLite(arrayDesign);
CompositeSequenceService compositeSequenceService = this.getBean(CompositeSequenceService.class);
for (String probeName : this.probeNames) {
CompositeSequence probe = compositeSequenceService.findByName(arrayDesign, probeName);
if (probe == null) {
AbstractCLI.log.warn("No such probe: " + probeName + " on " + arrayDesign.getShortName());
continue;
}
probe = compositeSequenceService.thaw(probe);
Map<String, Collection<BlatAssociation>> results = this.arrayDesignProbeMapperService.processCompositeSequence(this.config, taxon, null, probe);
for (Collection<BlatAssociation> col : results.values()) {
for (BlatAssociation association : col) {
if (AbstractCLI.log.isDebugEnabled())
AbstractCLI.log.debug(association);
}
arrayDesignProbeMapperService.printResult(probe, col);
}
}
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.
the class GoldenPathSequenceAnalysis method findAssociations.
/**
* Given a physical location, identify overlapping genes or predicted genes.
*
* @param chromosome The chromosome name (the organism is set by the constructor)
* @param queryStart The start base of the region to query (the start of the alignment to the genome)
* @param queryEnd The end base of the region to query (the end of the alignment to the genome)
* @param starts Locations of alignment block starts in target. (comma-delimited from blat)
* @param sizes Sizes of alignment blocks (comma-delimited from blat)
* @param strand Either + or - indicating the strand to look on, or null to search both strands.
* @param method The constant representing the method to use to locate the 3' distance.
* @param config configuration
* @return A list of BioSequence2GeneProduct objects. The distance stored by a ThreePrimeData will be 0 if the
* sequence overhangs the found genes (rather than providing a negative distance). If no genes are found,
* the result is null; These are transient instances, not from Gemma's database
*/
public Collection<BlatAssociation> findAssociations(String chromosome, Long queryStart, Long queryEnd, String starts, String sizes, String strand, ThreePrimeDistanceMethod method, ProbeMapperConfig config) {
if (GoldenPath.log.isDebugEnabled())
GoldenPath.log.debug("Seeking gene overlaps with: chrom=" + chromosome + " start=" + queryStart + " end=" + queryEnd + " strand=" + strand);
if (queryEnd < queryStart)
throw new IllegalArgumentException("End must not be less than start");
/*
* These are transient instances only
*/
Collection<GeneProduct> geneProducts = new HashSet<>();
if (config.isUseRefGene()) {
// starting with refgene means we can get the correct transcript name etc.
geneProducts.addAll(this.findRefGenesByLocation(chromosome, queryStart, queryEnd, strand));
}
if (config.isUseKnownGene()) {
// get known genes as well, in case all we got was an intron. Currently does not work with rat (rn6)
geneProducts.addAll(this.findKnownGenesByLocation(chromosome, queryStart, queryEnd, strand));
}
if (geneProducts.size() == 0)
return null;
Collection<BlatAssociation> results = new HashSet<>();
for (GeneProduct geneProduct : geneProducts) {
if (GoldenPath.log.isDebugEnabled())
GoldenPath.log.debug(geneProduct);
BlatAssociation blatAssociation = this.computeLocationInGene(chromosome, queryStart, queryEnd, starts, sizes, geneProduct, method, config);
/*
* We check against the actual threshold later. We can't fully check it now because not all the slots are
* populated yet.
*/
if (config.getMinimumExonOverlapFraction() > 0.0 && blatAssociation.getOverlap() == 0) {
GoldenPath.log.debug("Result failed to meet exon overlap threshold (0)");
continue;
}
results.add(blatAssociation);
}
return results;
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation in project Gemma by PavlidisLab.
the class BlatAssociationScorer method organizeBlatAssociationsByGene.
private static Map<Gene, Collection<BlatAssociation>> organizeBlatAssociationsByGene(Collection<BlatAssociation> blatAssociations) {
Map<Gene, Collection<BlatAssociation>> genes = new HashMap<>();
for (BlatAssociation blatAssociation : blatAssociations) {
Gene gene = blatAssociation.getGeneProduct().getGene();
if (!genes.containsKey(gene)) {
genes.put(gene, new HashSet<BlatAssociation>());
}
genes.get(gene).add(blatAssociation);
}
return genes;
}
Aggregations