Search in sources :

Example 1 with GeoSeries

use of ubic.gemma.core.loader.expression.geo.model.GeoSeries in project Gemma by PavlidisLab.

the class TwoChannelMissingValuesTest method testMissingValueGSE56.

/**
 * GSE56 is corrupt: there is no Channel 1 signal value in the data file.
 */
@Test
public void testMissingValueGSE56() throws Exception {
    ExpressionExperiment old = eeService.findByShortName("GSE56");
    if (old != null)
        eeService.remove(old);
    InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/GSE56Short/GSE56_family.soft.gz"));
    GeoFamilyParser parser = new GeoFamilyParser();
    parser.parse(is);
    GeoSeries series = ((GeoParseResult) parser.getResults().iterator().next()).getSeriesMap().get("GSE56");
    DatasetCombiner datasetCombiner = new DatasetCombiner();
    GeoSampleCorrespondence correspondence = datasetCombiner.findGSECorrespondence(series);
    series.setSampleCorrespondence(correspondence);
    Object result = this.gc.convert(series);
    assertNotNull(result);
    ExpressionExperiment expExp = (ExpressionExperiment) ((Collection<?>) result).iterator().next();
    expExp = persisterHelper.persist(expExp, persisterHelper.prepare(expExp));
    Collection<RawExpressionDataVector> calls = tcmv.computeMissingValues(expExp, 2.0, new ArrayList<Double>());
    assertEquals(10, calls.size());
}
Also used : GeoSeries(ubic.gemma.core.loader.expression.geo.model.GeoSeries) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) GZIPInputStream(java.util.zip.GZIPInputStream) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) Collection(java.util.Collection) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Example 2 with GeoSeries

use of ubic.gemma.core.loader.expression.geo.model.GeoSeries in project Gemma by PavlidisLab.

the class TwoChannelMissingValuesTest method testMissingValue.

@Test
public void testMissingValue() throws Exception {
    ExpressionExperiment old = eeService.findByShortName("GSE2221");
    if (old != null)
        eeService.remove(old);
    InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/shortGenePix/GSE2221_family.soft.gz"));
    GeoFamilyParser parser = new GeoFamilyParser();
    parser.parse(is);
    GeoSeries series = ((GeoParseResult) parser.getResults().iterator().next()).getSeriesMap().get("GSE2221");
    DatasetCombiner datasetCombiner = new DatasetCombiner();
    GeoSampleCorrespondence correspondence = datasetCombiner.findGSECorrespondence(series);
    series.setSampleCorrespondence(correspondence);
    Object result = this.gc.convert(series);
    assertNotNull(result);
    ExpressionExperiment expExp = (ExpressionExperiment) ((Collection<?>) result).iterator().next();
    expExp = persisterHelper.persist(expExp, persisterHelper.prepare(expExp));
    Collection<RawExpressionDataVector> calls = tcmv.computeMissingValues(expExp, 2.0, new ArrayList<Double>());
    assertEquals(500, calls.size());
    BioAssayDimension dim = calls.iterator().next().getBioAssayDimension();
    // Spot check the results. For sample ME-TMZ, ID #27 should be 'true' and 26 should be false.
    ByteArrayConverter bac = new ByteArrayConverter();
    boolean foundA = false;
    boolean foundB = false;
    for (DesignElementDataVector vector : calls) {
        if (vector.getDesignElement().getName().equals("26")) {
            byte[] dat = vector.getData();
            boolean[] row = bac.byteArrayToBooleans(dat);
            int i = 0;
            for (BioAssay bas : dim.getBioAssays()) {
                if (bas.getName().equals("expression array ME-TMZ")) {
                    assertTrue(!row[i]);
                    foundA = true;
                }
                i++;
            }
        }
        if (vector.getDesignElement().getName().equals("27")) {
            byte[] dat = vector.getData();
            boolean[] row = bac.byteArrayToBooleans(dat);
            int i = 0;
            for (BioAssay bas : dim.getBioAssays()) {
                if (bas.getName().equals("expression array ME-TMZ")) {
                    assertTrue(row[i]);
                    foundB = true;
                }
                i++;
            }
        }
    }
    assertTrue(foundA && foundB);
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) GeoSeries(ubic.gemma.core.loader.expression.geo.model.GeoSeries) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) GZIPInputStream(java.util.zip.GZIPInputStream) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) Collection(java.util.Collection) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Example 3 with GeoSeries

use of ubic.gemma.core.loader.expression.geo.model.GeoSeries in project Gemma by PavlidisLab.

the class TwoChannelMissingValuesTest method testMissingValueGSE5091.

/**
 * GSE56 is corrupt: there is no Channel 1 signal value in the data file.
 */
@Test
public void testMissingValueGSE5091() throws Exception {
    ExpressionExperiment old = eeService.findByShortName("GSE5091");
    if (old != null)
        eeService.remove(old);
    InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/GSE5091Short/GSE5091_family.soft.gz"));
    GeoFamilyParser parser = new GeoFamilyParser();
    parser.parse(is);
    GeoSeries series = ((GeoParseResult) parser.getResults().iterator().next()).getSeriesMap().get("GSE5091");
    DatasetCombiner datasetCombiner = new DatasetCombiner();
    GeoSampleCorrespondence correspondence = datasetCombiner.findGSECorrespondence(series);
    series.setSampleCorrespondence(correspondence);
    gc = this.getBean(GeoConverter.class);
    Object result = this.gc.convert(series);
    assertNotNull(result);
    ExpressionExperiment expExp = (ExpressionExperiment) ((Collection<?>) result).iterator().next();
    expExp = persisterHelper.persist(expExp, persisterHelper.prepare(expExp));
    Collection<RawExpressionDataVector> calls = tcmv.computeMissingValues(expExp, 2.0, new ArrayList<Double>());
    assertEquals(10, calls.size());
}
Also used : GeoSeries(ubic.gemma.core.loader.expression.geo.model.GeoSeries) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) GZIPInputStream(java.util.zip.GZIPInputStream) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) Collection(java.util.Collection) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Example 4 with GeoSeries

use of ubic.gemma.core.loader.expression.geo.model.GeoSeries in project Gemma by PavlidisLab.

the class DatasetCombinerTest method testGDS186.

/*
     * This has just a single data set but results in a "no platform assigned" error.
     */
@Test
public void testGDS186() throws Exception {
    GeoFamilyParser parser = new GeoFamilyParser();
    try (InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/gse106Short/GDS186.soft.gz"))) {
        parser.parse(is);
    }
    try (InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/gse106Short/GSE106.soft.gz"))) {
        parser.parse(is);
    }
    GeoParseResult parseResult = ((GeoParseResult) parser.getResults().iterator().next());
    GeoDataset gd = parseResult.getDatasets().values().iterator().next();
    GeoSeries gse = parseResult.getSeries().values().iterator().next();
    gd.getSeries().add(gse);
    gds = new HashSet<>();
    gds.add(gd);
    DatasetCombiner datasetCombiner = new DatasetCombiner();
    GeoSampleCorrespondence result = datasetCombiner.findGSECorrespondence(gds);
    DatasetCombinerTest.log.debug(result);
    Iterator<Set<String>> it = result.iterator();
    int numBioMaterials = 0;
    while (it.hasNext()) {
        Collection<String> c = it.next();
        assertTrue(c.size() == 1);
        numBioMaterials++;
    }
    assertEquals(11, numBioMaterials);
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) GeoSeries(ubic.gemma.core.loader.expression.geo.model.GeoSeries) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) GZIPInputStream(java.util.zip.GZIPInputStream) GeoDataset(ubic.gemma.core.loader.expression.geo.model.GeoDataset) Test(org.junit.Test)

Example 5 with GeoSeries

use of ubic.gemma.core.loader.expression.geo.model.GeoSeries in project Gemma by PavlidisLab.

the class GeoConverterTest method test5C.

/*
     * Bug 3976: make sure we skip non-expression data sets.
     *
     */
@Test
public final void test5C() {
    // GSE35721
    GeoDomainObjectGenerator g = new GeoDomainObjectGenerator();
    GeoSeries series = (GeoSeries) g.generate("GSE35721").iterator().next();
    @SuppressWarnings("unchecked") Collection<ExpressionExperiment> r = (Collection<ExpressionExperiment>) this.gc.convert(series);
    assertTrue(r.isEmpty());
}
Also used : GeoSeries(ubic.gemma.core.loader.expression.geo.model.GeoSeries) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Aggregations

Test (org.junit.Test)38 GeoSeries (ubic.gemma.core.loader.expression.geo.model.GeoSeries)38 GZIPInputStream (java.util.zip.GZIPInputStream)37 InputStream (java.io.InputStream)36 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)33 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)29 Collection (java.util.Collection)6 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)6 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)5 StandardQuantitationType (ubic.gemma.model.common.quantitationtype.StandardQuantitationType)5 HashSet (java.util.HashSet)3 Set (java.util.Set)3 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)3 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)3 GeoDataset (ubic.gemma.core.loader.expression.geo.model.GeoDataset)2 Taxon (ubic.gemma.model.genome.Taxon)2 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)2 Transactional (org.springframework.transaction.annotation.Transactional)1 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)1 ExpressionDataSVD (ubic.gemma.core.analysis.preprocess.svd.ExpressionDataSVD)1