Search in sources :

Example 26 with BlatResult

use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.

the class BlatAssociationScorerTest method testScoreResults.

@Test
public void testScoreResults() {
    // there's only one gene product that is aligned to two different regions
    GeneProduct geneProduct = this.createGeneProduct();
    BlatResult blatResult_1 = this.createBlatResult("6_cox_hap2");
    BlatResult blatResult_2 = this.createBlatResult("6");
    // this has the highest score but located on a non-canonical chromosome
    // so this should be ignored
    BlatAssociation association_1 = BlatAssociation.Factory.newInstance();
    association_1.setGeneProduct(geneProduct);
    association_1.setBlatResult(blatResult_1);
    association_1.setScore(50.0);
    association_1.setOverlap(50);
    association_1.setBioSequence(BioSequence.Factory.newInstance());
    BlatAssociation association_2 = BlatAssociation.Factory.newInstance();
    association_2.setGeneProduct(geneProduct);
    association_2.setBlatResult(blatResult_2);
    association_2.setScore(30.0);
    association_2.setOverlap(30);
    association_2.setBioSequence(BioSequence.Factory.newInstance());
    Collection<BlatAssociation> blatAssociations = new ArrayList<>();
    blatAssociations.add(association_1);
    blatAssociations.add(association_2);
    ProbeMapperConfig config = new ProbeMapperConfig();
    config.setTrimNonCanonicalChromosomeHits(true);
    // BlatAssociation expected = association_2;
    BlatAssociation actual = BlatAssociationScorer.scoreResults(blatAssociations);
    assertFalse(ChromosomeUtil.isCanonical(blatResult_1.getTargetChromosome()));
    assertTrue(ChromosomeUtil.isCanonical(blatResult_2.getTargetChromosome()));
    assertEquals(940.0, association_1.getScore(), 0);
    assertEquals(564.0, association_2.getScore(), 0);
    assertEquals(1.0, actual.getSpecificity(), 0);
// assertEquals( expected, actual );
}
Also used : GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) ArrayList(java.util.ArrayList) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult) Test(org.junit.Test)

Example 27 with BlatResult

use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.

the class BlatAssociationScorerTest method createBlatResult.

private BlatResult createBlatResult(String name) {
    BlatResult blatResult = BlatResult.Factory.newInstance();
    blatResult.setRepMatches(0);
    blatResult.setMatches(49);
    blatResult.setQueryGapCount(0);
    blatResult.setTargetGapCount(2);
    blatResult.setMismatches(1);
    BioSequence sequence = BioSequence.Factory.newInstance();
    blatResult.setQuerySequence(sequence);
    blatResult.getQuerySequence().setLength(50L);
    Taxon taxon = Taxon.Factory.newInstance();
    taxon.setCommonName("human");
    Chromosome chr = new Chromosome(name, taxon);
    blatResult.setTargetChromosome(chr);
    return blatResult;
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) Taxon(ubic.gemma.model.genome.Taxon) Chromosome(ubic.gemma.model.genome.Chromosome) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult)

Example 28 with BlatResult

use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.

the class BlatResultParser method parseOneLine.

@Override
public BlatResult parseOneLine(String line) {
    if (StringUtils.isBlank(line))
        return null;
    try {
        // check if it is a header line.
        if (line.startsWith("psLayout") || line.startsWith("match") || line.startsWith("    ") || line.startsWith("-----------------------")) {
            return null;
        }
        String[] f = line.split("\t");
        if (f.length == 0)
            return null;
        if (f.length != BlatResultParser.NUM_BLAT_FIELDS)
            throw new IllegalArgumentException(f.length + " fields in line, expected " + BlatResultParser.NUM_BLAT_FIELDS + " (starts with " + line.substring(0, Math.max(line.length(), 25)));
        BlatResult result = BlatResult.Factory.newInstance();
        result.setQuerySequence(BioSequence.Factory.newInstance());
        Long queryLength = Long.parseLong(f[BlatResultParser.QSIZE_FIELD]);
        result.getQuerySequence().setLength(queryLength);
        result.setMatches(Integer.parseInt(f[BlatResultParser.MATCHES_FIELD]));
        result.setMismatches(Integer.parseInt(f[BlatResultParser.MISMATCHES_FIELD]));
        result.setRepMatches(Integer.parseInt(f[BlatResultParser.REPMATCHES_FIELD]));
        result.setNs(Integer.parseInt(f[BlatResultParser.NS_FIELD]));
        result.setQueryGapCount(Integer.parseInt(f[BlatResultParser.QGAPCOUNT_FIELD]));
        result.setQueryGapBases(Integer.parseInt(f[BlatResultParser.QGAPBASES_FIELD]));
        result.setTargetGapBases(Integer.parseInt(f[BlatResultParser.TGAPBASES_FIELD]));
        result.setTargetGapCount(Integer.parseInt(f[BlatResultParser.TGAPCOUNT_FIELD]));
        result.setStrand(f[BlatResultParser.STRAND_FIELD]);
        result.setQueryStart(Integer.parseInt(f[BlatResultParser.QSTART_FIELD]));
        result.setQueryEnd(Integer.parseInt(f[BlatResultParser.QEND_FIELD]));
        result.setTargetStart(Long.parseLong(f[BlatResultParser.TSTART_FIELD]));
        result.setTargetEnd(Long.parseLong(f[BlatResultParser.TEND_FIELD]));
        result.setBlockCount(Integer.parseInt(f[BlatResultParser.BLOCKCOUNT_FIELD]));
        result.setBlockSizes(f[BlatResultParser.BLOCKSIZES_FIELD]);
        result.setQueryStarts(f[BlatResultParser.QSTARTS_FIELD]);
        result.setTargetStarts(f[BlatResultParser.TSTARTS_FIELD]);
        String queryName = f[BlatResultParser.QNAME_FIELD];
        queryName = BlatResultParser.cleanUpQueryName(queryName);
        assert StringUtils.isNotBlank(queryName);
        result.getQuerySequence().setName(queryName);
        String chrom = f[BlatResultParser.TNAME_FIELD];
        if (chrom.startsWith("chr")) {
            chrom = chrom.substring(chrom.indexOf("chr") + 3);
            if (chrom.endsWith(".fa")) {
                chrom = chrom.substring(0, chrom.indexOf(".fa"));
            }
        }
        if (scoreThreshold > 0.0 && result.score() < scoreThreshold) {
            numSkipped++;
            return null;
        }
        result.setTargetChromosome(new Chromosome(chrom, null, BioSequence.Factory.newInstance(), taxon));
        result.getTargetChromosome().getSequence().setName(chrom);
        result.getTargetChromosome().getSequence().setLength(Long.parseLong(f[BlatResultParser.TSIZE_FIELD]));
        result.getTargetChromosome().getSequence().setTaxon(taxon);
        if (searchedDatabase != null) {
            result.setSearchedDatabase(searchedDatabase);
        }
        result.setTargetAlignedRegion(this.makePhysicalLocation(result));
        return result;
    } catch (NumberFormatException e) {
        log.error("Invalid number format", e);
        return null;
    } catch (IllegalArgumentException e) {
        throw new RuntimeException(e);
    }
}
Also used : Chromosome(ubic.gemma.model.genome.Chromosome) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult)

Example 29 with BlatResult

use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.

the class GenomePersister method persistBlatAssociation.

private BioSequence2GeneProduct persistBlatAssociation(BlatAssociation association) {
    BlatResult blatResult = association.getBlatResult();
    if (this.isTransient(blatResult)) {
        blatResultDao.create(blatResult);
    }
    if (AbstractPersister.log.isDebugEnabled()) {
        AbstractPersister.log.debug("Persisting " + association);
    }
    association.setGeneProduct(this.persistGeneProduct(association.getGeneProduct()));
    association.setBioSequence(this.persistBioSequence(association.getBioSequence()));
    return blatAssociationDao.create(association);
}
Also used : BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult)

Example 30 with BlatResult

use of ubic.gemma.model.genome.sequenceAnalysis.BlatResult in project Gemma by PavlidisLab.

the class GoldenPathSequenceAnalysis method getThreePrimeDistances.

/**
 * Uses default mapping settings
 *
 * @param identifier identifier
 * @param method the method
 * @return bio seq 2 gene producs
 */
public Collection<BioSequence2GeneProduct> getThreePrimeDistances(String identifier, ThreePrimeDistanceMethod method) {
    Collection<BlatResult> locations = this.findSequenceLocations(identifier);
    Collection<BioSequence2GeneProduct> results = new HashSet<>();
    for (BlatResult br : locations) {
        results.addAll(this.getThreePrimeDistances(br, method));
    }
    return results;
}
Also used : BioSequence2GeneProduct(ubic.gemma.model.association.BioSequence2GeneProduct) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult) HashSet(java.util.HashSet)

Aggregations

BlatResult (ubic.gemma.model.genome.sequenceAnalysis.BlatResult)33 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)12 HashSet (java.util.HashSet)10 Collection (java.util.Collection)9 Taxon (ubic.gemma.model.genome.Taxon)6 Chromosome (ubic.gemma.model.genome.Chromosome)5 BlatAssociation (ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)5 HashMap (java.util.HashMap)4 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)4 PhysicalLocation (ubic.gemma.model.genome.PhysicalLocation)4 IOException (java.io.IOException)3 InputStream (java.io.InputStream)3 Blat (ubic.gemma.core.apps.Blat)3 ShellDelegatingBlat (ubic.gemma.core.apps.ShellDelegatingBlat)3 ExternalDatabase (ubic.gemma.model.common.description.ExternalDatabase)3 Test (org.junit.Test)2 BioSequence2GeneProduct (ubic.gemma.model.association.BioSequence2GeneProduct)2 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)2 ArrayList (java.util.ArrayList)1 Date (java.util.Date)1