Search in sources :

Example 76 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class MockBlat method blatQuery.

@Override
public Collection<BlatResult> blatQuery(BioSequence b) {
    Collection<BlatResult> result = new HashSet<>();
    BioSequence chromseq = PersistentDummyObjectHelper.getTestNonPersistentBioSequence(taxon);
    chromseq.setLength((long) 1e7);
    BlatResult br = BlatResult.Factory.newInstance();
    Chromosome chromosome = new Chromosome("XXX", null, chromseq, taxon);
    br.setTargetChromosome(chromosome);
    assert br.getTargetChromosome().getSequence() != null;
    long targetStart = MockBlat.RANDOM.nextInt(chromseq.getLength().intValue());
    br.setQuerySequence(b);
    br.setTargetStart(targetStart);
    br.setTargetEnd(targetStart + b.getLength());
    br.setMatches((int) (b.getLength() - 1));
    br.setMismatches(1);
    br.setRepMatches(0);
    br.setQueryGapCount(0);
    br.setQueryGapBases(0);
    br.setQueryStart(0);
    br.setQueryEnd(b.getLength().intValue());
    br.setTargetGapBases(0);
    br.setTargetGapCount(0);
    PhysicalLocation targetAlignedRegion = PhysicalLocation.Factory.newInstance();
    targetAlignedRegion.setChromosome(br.getTargetChromosome());
    targetAlignedRegion.setNucleotide(targetStart);
    targetAlignedRegion.setNucleotideLength(b.getLength().intValue());
    targetAlignedRegion.setStrand("+");
    result.add(br);
    return result;
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) Chromosome(ubic.gemma.model.genome.Chromosome) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult) PhysicalLocation(ubic.gemma.model.genome.PhysicalLocation)

Example 77 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class GeoConverterTest method testArrayTaxonDifferentToSampleTaxon.

/*
     * GSE2388 is an example of where the array and sample taxon do not match. This test checks that the biomaterial and
     * array taxons are set correctly.
     *
     */
@SuppressWarnings("unchecked")
@Test
@Transactional
public void testArrayTaxonDifferentToSampleTaxon() throws Exception {
    Taxon rainbowTrout = taxonService.findByAbbreviation("omyk");
    assertNotNull(rainbowTrout);
    Taxon atlanticSalm = taxonService.findByAbbreviation("ssal");
    assertNotNull(atlanticSalm);
    InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/GSE2388_family.soft.gz"));
    GeoFamilyParser parser = new GeoFamilyParser();
    parser.parse(is);
    GeoSeries series = ((GeoParseResult) parser.getResults().iterator().next()).getSeriesMap().get("GSE2388");
    GeoPlatform platform = ((GeoParseResult) parser.getResults().iterator().next()).getPlatformMap().get("GPL966");
    DatasetCombiner datasetCombiner = new DatasetCombiner();
    GeoSampleCorrespondence correspondence = datasetCombiner.findGSECorrespondence(series);
    series.setSampleCorrespondence(correspondence);
    // assert that the biomaterials have been set as one taxon
    Object seriesResult = gc.convert(series);
    assertNotNull(seriesResult);
    Collection<ExpressionExperiment> ees = (Collection<ExpressionExperiment>) seriesResult;
    ExpressionExperiment exper = ees.iterator().next();
    Collection<BioAssay> bioassays = exper.getBioAssays();
    BioMaterial material = bioassays.iterator().next().getSampleUsed();
    Taxon taxon = material.getSourceTaxon();
    assertEquals("Oncorhynchus kisutch", taxon.getScientificName());
    // assert that the platform is another taxon
    Object resultPlatForm = gc.convert(platform);
    ArrayDesign ad = (ArrayDesign) resultPlatForm;
    assertNotNull(ad);
    Set<Taxon> taxa = new HashSet<>();
    for (CompositeSequence cs : ad.getCompositeSequences()) {
        BioSequence bs = cs.getBiologicalCharacteristic();
        if (bs != null) {
            assertNotNull(bs.getTaxon());
            log.info(bs.getTaxon());
            taxa.add(bs.getTaxon());
        }
    }
    // can be empty taxon if the probe does not have a sequence which is why taxon size is 3.
    assertEquals(2, taxa.size());
    assertTrue(taxa.contains(rainbowTrout));
    assertTrue(taxa.contains(atlanticSalm));
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) GeoSeries(ubic.gemma.core.loader.expression.geo.model.GeoSeries) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) Taxon(ubic.gemma.model.genome.Taxon) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) GeoPlatform(ubic.gemma.core.loader.expression.geo.model.GeoPlatform) GZIPInputStream(java.util.zip.GZIPInputStream) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest) Transactional(org.springframework.transaction.annotation.Transactional)

Example 78 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class GeoConverterTest method testSingleTaxonOnArrayWithNoOrganismColumn.

/*
     * Ensure that if platform has one taxon then taxon is still set correctly
     */
@Test
public void testSingleTaxonOnArrayWithNoOrganismColumn() throws Exception {
    InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/GPL226_family.soft.gz"));
    GeoFamilyParser parser = new GeoFamilyParser();
    parser.setProcessPlatformsOnly(true);
    parser.parse(is);
    GeoPlatform platform = ((GeoParseResult) parser.getResults().iterator().next()).getPlatformMap().get("GPL226");
    Object result = this.gc.convert(platform);
    ArrayDesign ad = (ArrayDesign) result;
    assertNotNull(ad);
    Set<Taxon> listPossibleTaxonValues = new HashSet<>();
    BioSequence bs;
    for (CompositeSequence cs : ad.getCompositeSequences()) {
        bs = cs.getBiologicalCharacteristic();
        if (bs != null) {
            listPossibleTaxonValues.add(bs.getTaxon());
        }
    }
    assertEquals(1, listPossibleTaxonValues.size());
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) Taxon(ubic.gemma.model.genome.Taxon) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) GeoPlatform(ubic.gemma.core.loader.expression.geo.model.GeoPlatform) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Example 79 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class FastaParserTest method testParseInputStream.

public void testParseInputStream() throws Exception {
    FastaParser p = new FastaParser();
    p.parse(f);
    Collection<BioSequence> actualResult = p.getResults();
    TestCase.assertNotNull(actualResult);
    TestCase.assertEquals(172, actualResult.size());
    for (Object object : actualResult) {
        BioSequence b = (BioSequence) object;
        FastaParserTest.log.debug("NAME=" + b.getName() + " DESC=" + b.getDescription() + " SEQ=" + b.getSequence());
    }
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence)

Example 80 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class FastaParserTest method testParsecodelink.

public void testParsecodelink() throws Exception {
    try (InputStream n = FastaParserTest.class.getResourceAsStream("/data/loader/genome/codelink.testsequence.txt")) {
        FastaParser p = new FastaParser();
        p.parse(n);
        Collection<BioSequence> actualResult = p.getResults();
        TestCase.assertNotNull(actualResult);
        TestCase.assertEquals(22, actualResult.size());
        for (Object object : actualResult) {
            BioSequence b = (BioSequence) object;
            FastaParserTest.log.debug("NAME=" + b.getName() + " DESC=" + b.getDescription() + " SEQ=" + b.getSequence());
        }
    }
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) InputStream(java.io.InputStream)

Aggregations

BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)105 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)40 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)24 Test (org.junit.Test)18 HashSet (java.util.HashSet)17 Taxon (ubic.gemma.model.genome.Taxon)15 BlatResult (ubic.gemma.model.genome.sequenceAnalysis.BlatResult)12 InputStream (java.io.InputStream)11 Collection (java.util.Collection)11 HashMap (java.util.HashMap)10 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)10 GZIPInputStream (java.util.zip.GZIPInputStream)7 Gene (ubic.gemma.model.genome.Gene)7 GeoPlatform (ubic.gemma.core.loader.expression.geo.model.GeoPlatform)6 DatabaseEntry (ubic.gemma.model.common.description.DatabaseEntry)6 StopWatch (org.apache.commons.lang3.time.StopWatch)5 GeneProduct (ubic.gemma.model.genome.gene.GeneProduct)5 BioSequenceValueObject (ubic.gemma.model.genome.sequenceAnalysis.BioSequenceValueObject)5 BlatAssociation (ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)5 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)4