Search in sources :

Example 36 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class GeoConverterTest method testImageClones.

/*
     * We should not longer use IMAGE:XXXXX as the sequence name.
     */
@Test
public void testImageClones() throws Exception {
    InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/GPL226_family.soft.gz"));
    GeoFamilyParser parser = new GeoFamilyParser();
    parser.parse(is);
    GeoPlatform platform = ((GeoParseResult) parser.getResults().iterator().next()).getPlatformMap().get("GPL226");
    Object result = this.gc.convert(platform);
    ArrayDesign ad = (ArrayDesign) result;
    assertNotNull(ad);
    for (CompositeSequence cs : ad.getCompositeSequences()) {
        BioSequence bs = cs.getBiologicalCharacteristic();
        assertTrue("Got: " + bs.getName(), !bs.getName().startsWith("IMAGE") || bs.getSequenceDatabaseEntry() == null);
    }
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) GeoPlatform(ubic.gemma.core.loader.expression.geo.model.GeoPlatform) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Example 37 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class GeoConverterTest method testWithImages.

/*
     * Has image clones.
     */
@Test
public final void testWithImages() throws Exception {
    GeoFamilyParser parser = new GeoFamilyParser();
    parser.setProcessPlatformsOnly(true);
    try (InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/GPL890_family.soft.gz"))) {
        parser.parse(is);
    }
    GeoPlatform platform = ((GeoParseResult) parser.getResults().iterator().next()).getPlatformMap().get("GPL890");
    Object result = this.gc.convert(platform);
    ArrayDesign ad = (ArrayDesign) result;
    for (CompositeSequence cs : ad.getCompositeSequences()) {
        BioSequence bs = cs.getBiologicalCharacteristic();
        if (bs != null && bs.getSequence() != null) {
            return;
        }
    }
    fail("No sequences!");
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) GeoPlatform(ubic.gemma.core.loader.expression.geo.model.GeoPlatform) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Example 38 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class GeoConverterTest method testMultipleTaxaIdentifiedBYAbbreviationsOnArrayWithOrganismColumn.

/*
     * Method to test that an array design can have multiple taxa stored against it and that if abbreviations used as
     * probe names mapped to the scientific names correctly if the abbreviation is stored in DB.
     */
@Test
public void testMultipleTaxaIdentifiedBYAbbreviationsOnArrayWithOrganismColumn() throws Exception {
    Taxon rainbowTroat = taxonService.findByAbbreviation("omyk");
    Taxon whiteFish = taxonService.findByAbbreviation("cclu");
    Taxon rainbowSmelt = taxonService.findByAbbreviation("omor");
    Taxon atlanticSalm = taxonService.findByAbbreviation("ssal");
    assertNotNull(atlanticSalm);
    // prototype bean.
    gc = this.getBean(GeoConverter.class);
    InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/GPL2899_family.soft.gz"));
    GeoFamilyParser parser = new GeoFamilyParser();
    // parse only the plaform
    parser.setProcessPlatformsOnly(true);
    parser.parse(is);
    GeoPlatform platform = ((GeoParseResult) parser.getResults().iterator().next()).getPlatformMap().get("GPL2899");
    Object result = gc.convert(platform);
    ArrayDesign ad = (ArrayDesign) result;
    assertNotNull(ad);
    Set<Taxon> taxa = new HashSet<>();
    BioSequence bs;
    for (CompositeSequence cs : ad.getCompositeSequences()) {
        bs = cs.getBiologicalCharacteristic();
        if (bs != null) {
            assertNotNull(bs.getTaxon());
            taxa.add(bs.getTaxon());
        }
    }
    assertEquals(4, taxa.size());
    // original file has five taxa, test file just kept four.
    assertTrue(taxa.contains(atlanticSalm));
    assertTrue(taxa.contains(rainbowTroat));
    assertTrue(taxa.contains(whiteFish));
    assertTrue(taxa.contains(rainbowSmelt));
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) Taxon(ubic.gemma.model.genome.Taxon) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) GeoPlatform(ubic.gemma.core.loader.expression.geo.model.GeoPlatform) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Example 39 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class CommonQueries method getCs2GeneMap.

/**
 * @return map of probes to input genes they map to. Other genes those probes might detect are not included.
 */
public static Map<CompositeSequence, Collection<Gene>> getCs2GeneMap(Collection<Gene> genes, Session session) {
    StopWatch timer = new StopWatch();
    timer.start();
    final String csQueryString = "select distinct cs, gene from Gene as gene" + " inner join gene.products gp, BioSequence2GeneProduct ba, CompositeSequence cs " + " where ba.bioSequence=cs.biologicalCharacteristic and ba.geneProduct = gp" + " and gene in (:genes)  ";
    Map<CompositeSequence, Collection<Gene>> cs2gene = new HashMap<>();
    org.hibernate.Query queryObject = session.createQuery(csQueryString);
    queryObject.setCacheable(true);
    queryObject.setParameterList("genes", genes);
    queryObject.setReadOnly(true);
    queryObject.setFlushMode(FlushMode.MANUAL);
    ScrollableResults results = queryObject.scroll(ScrollMode.FORWARD_ONLY);
    CommonQueries.addGenes(cs2gene, results);
    results.close();
    if (timer.getTime() > 200) {
        CommonQueries.log.info("Get cs2gene for " + genes.size() + " :" + timer.getTime() + "ms");
    }
    return cs2gene;
}
Also used : org.hibernate(org.hibernate) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 40 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class CommonQueries method getCs2GeneMap.

public static Map<CompositeSequence, Collection<Gene>> getCs2GeneMap(Collection<Gene> genes, Collection<ArrayDesign> arrayDesigns, Session session) {
    StopWatch timer = new StopWatch();
    timer.start();
    final String csQueryString = "select distinct cs, gene from Gene as gene" + " inner join gene.products gp, BioSequence2GeneProduct ba, CompositeSequence cs " + " where ba.bioSequence=cs.biologicalCharacteristic and ba.geneProduct = gp" + " and gene in (:genes) and cs.arrayDesign in (:ads) ";
    Map<CompositeSequence, Collection<Gene>> cs2gene = new HashMap<>();
    Query queryObject = session.createQuery(csQueryString);
    queryObject.setCacheable(true);
    queryObject.setParameterList("genes", genes);
    queryObject.setParameterList("ads", arrayDesigns);
    queryObject.setReadOnly(true);
    queryObject.setFlushMode(FlushMode.MANUAL);
    ScrollableResults results = queryObject.scroll(ScrollMode.FORWARD_ONLY);
    CommonQueries.addGenes(cs2gene, results);
    results.close();
    if (timer.getTime() > 200) {
        CommonQueries.log.info("Get cs2gene for " + genes.size() + " :" + timer.getTime() + "ms");
    }
    return cs2gene;
}
Also used : CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) StopWatch(org.apache.commons.lang3.time.StopWatch)

Aggregations

CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)206 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)43 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)40 Gene (ubic.gemma.model.genome.Gene)32 Test (org.junit.Test)30 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)19 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)18 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)18 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)18 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)18 StopWatch (org.apache.commons.lang3.time.StopWatch)17 HashSet (java.util.HashSet)15 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)15 CompositeSequenceValueObject (ubic.gemma.model.expression.designElement.CompositeSequenceValueObject)15 ArrayList (java.util.ArrayList)14 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)14 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)13 Taxon (ubic.gemma.model.genome.Taxon)12 Collection (java.util.Collection)11 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)11