use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.
the class BlatAssociationScorerTest method testScoreResults.
@Test
public void testScoreResults() {
// there's only one gene product that is aligned to two different regions
GeneProduct geneProduct = this.createGeneProduct();
BlatResult blatResult_1 = this.createBlatResult("6_cox_hap2");
BlatResult blatResult_2 = this.createBlatResult("6");
// this has the highest score but located on a non-canonical chromosome
// so this should be ignored
BlatAssociation association_1 = BlatAssociation.Factory.newInstance();
association_1.setGeneProduct(geneProduct);
association_1.setBlatResult(blatResult_1);
association_1.setScore(50.0);
association_1.setOverlap(50);
association_1.setBioSequence(BioSequence.Factory.newInstance());
BlatAssociation association_2 = BlatAssociation.Factory.newInstance();
association_2.setGeneProduct(geneProduct);
association_2.setBlatResult(blatResult_2);
association_2.setScore(30.0);
association_2.setOverlap(30);
association_2.setBioSequence(BioSequence.Factory.newInstance());
Collection<BlatAssociation> blatAssociations = new ArrayList<>();
blatAssociations.add(association_1);
blatAssociations.add(association_2);
ProbeMapperConfig config = new ProbeMapperConfig();
config.setTrimNonCanonicalChromosomeHits(true);
// BlatAssociation expected = association_2;
BlatAssociation actual = BlatAssociationScorer.scoreResults(blatAssociations);
assertFalse(ChromosomeUtil.isCanonical(blatResult_1.getTargetChromosome()));
assertTrue(ChromosomeUtil.isCanonical(blatResult_2.getTargetChromosome()));
assertEquals(940.0, association_1.getScore(), 0);
assertEquals(564.0, association_2.getScore(), 0);
assertEquals(1.0, actual.getSpecificity(), 0);
// assertEquals( expected, actual );
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.
the class BlatAssociationScorerTest method createBlatResult.
private BlatResult createBlatResult(String name) {
BlatResult blatResult = BlatResult.Factory.newInstance();
blatResult.setRepMatches(0);
blatResult.setMatches(49);
blatResult.setQueryGapCount(0);
blatResult.setTargetGapCount(2);
blatResult.setMismatches(1);
BioSequence sequence = BioSequence.Factory.newInstance();
blatResult.setQuerySequence(sequence);
blatResult.getQuerySequence().setLength(50L);
Taxon taxon = Taxon.Factory.newInstance();
taxon.setCommonName("human");
Chromosome chr = new Chromosome(name, taxon);
blatResult.setTargetChromosome(chr);
return blatResult;
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.
the class BlatResultParser method parseOneLine.
@Override
public BlatResult parseOneLine(String line) {
if (StringUtils.isBlank(line))
return null;
try {
// check if it is a header line.
if (line.startsWith("psLayout") || line.startsWith("match") || line.startsWith(" ") || line.startsWith("-----------------------")) {
return null;
}
String[] f = line.split("\t");
if (f.length == 0)
return null;
if (f.length != BlatResultParser.NUM_BLAT_FIELDS)
throw new IllegalArgumentException(f.length + " fields in line, expected " + BlatResultParser.NUM_BLAT_FIELDS + " (starts with " + line.substring(0, Math.max(line.length(), 25)));
BlatResult result = BlatResult.Factory.newInstance();
result.setQuerySequence(BioSequence.Factory.newInstance());
Long queryLength = Long.parseLong(f[BlatResultParser.QSIZE_FIELD]);
result.getQuerySequence().setLength(queryLength);
result.setMatches(Integer.parseInt(f[BlatResultParser.MATCHES_FIELD]));
result.setMismatches(Integer.parseInt(f[BlatResultParser.MISMATCHES_FIELD]));
result.setRepMatches(Integer.parseInt(f[BlatResultParser.REPMATCHES_FIELD]));
result.setNs(Integer.parseInt(f[BlatResultParser.NS_FIELD]));
result.setQueryGapCount(Integer.parseInt(f[BlatResultParser.QGAPCOUNT_FIELD]));
result.setQueryGapBases(Integer.parseInt(f[BlatResultParser.QGAPBASES_FIELD]));
result.setTargetGapBases(Integer.parseInt(f[BlatResultParser.TGAPBASES_FIELD]));
result.setTargetGapCount(Integer.parseInt(f[BlatResultParser.TGAPCOUNT_FIELD]));
result.setStrand(f[BlatResultParser.STRAND_FIELD]);
result.setQueryStart(Integer.parseInt(f[BlatResultParser.QSTART_FIELD]));
result.setQueryEnd(Integer.parseInt(f[BlatResultParser.QEND_FIELD]));
result.setTargetStart(Long.parseLong(f[BlatResultParser.TSTART_FIELD]));
result.setTargetEnd(Long.parseLong(f[BlatResultParser.TEND_FIELD]));
result.setBlockCount(Integer.parseInt(f[BlatResultParser.BLOCKCOUNT_FIELD]));
result.setBlockSizes(f[BlatResultParser.BLOCKSIZES_FIELD]);
result.setQueryStarts(f[BlatResultParser.QSTARTS_FIELD]);
result.setTargetStarts(f[BlatResultParser.TSTARTS_FIELD]);
String queryName = f[BlatResultParser.QNAME_FIELD];
queryName = BlatResultParser.cleanUpQueryName(queryName);
assert StringUtils.isNotBlank(queryName);
result.getQuerySequence().setName(queryName);
String chrom = f[BlatResultParser.TNAME_FIELD];
if (chrom.startsWith("chr")) {
chrom = chrom.substring(chrom.indexOf("chr") + 3);
if (chrom.endsWith(".fa")) {
chrom = chrom.substring(0, chrom.indexOf(".fa"));
}
}
if (scoreThreshold > 0.0 && result.score() < scoreThreshold) {
numSkipped++;
return null;
}
result.setTargetChromosome(new Chromosome(chrom, null, BioSequence.Factory.newInstance(), taxon));
result.getTargetChromosome().getSequence().setName(chrom);
result.getTargetChromosome().getSequence().setLength(Long.parseLong(f[BlatResultParser.TSIZE_FIELD]));
result.getTargetChromosome().getSequence().setTaxon(taxon);
if (searchedDatabase != null) {
result.setSearchedDatabase(searchedDatabase);
}
result.setTargetAlignedRegion(this.makePhysicalLocation(result));
return result;
} catch (NumberFormatException e) {
log.error("Invalid number format", e);
return null;
} catch (IllegalArgumentException e) {
throw new RuntimeException(e);
}
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.
the class GenomePersister method persistBlatAssociation.
private BioSequence2GeneProduct persistBlatAssociation(BlatAssociation association) {
BlatResult blatResult = association.getBlatResult();
if (this.isTransient(blatResult)) {
blatResultDao.create(blatResult);
}
if (AbstractPersister.log.isDebugEnabled()) {
AbstractPersister.log.debug("Persisting " + association);
}
association.setGeneProduct(this.persistGeneProduct(association.getGeneProduct()));
association.setBioSequence(this.persistBioSequence(association.getBioSequence()));
return blatAssociationDao.create(association);
}
use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.
the class GoldenPathSequenceAnalysis method getThreePrimeDistances.
/**
* Uses default mapping settings
*
* @param identifier identifier
* @param method the method
* @return bio seq 2 gene producs
*/
public Collection<BioSequence2GeneProduct> getThreePrimeDistances(String identifier, ThreePrimeDistanceMethod method) {
Collection<BlatResult> locations = this.findSequenceLocations(identifier);
Collection<BioSequence2GeneProduct> results = new HashSet<>();
for (BlatResult br : locations) {
results.addAll(this.getThreePrimeDistances(br, method));
}
return results;
}
Aggregations