Search in sources :

Example 11 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class GeoDatasetServiceTest method testFetchAndLoadMultiChipPerSeriesShort.

@Test
public void testFetchAndLoadMultiChipPerSeriesShort() throws Exception {
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("shortTest")));
    /*
         * HG-U133A. GDS473 is for the other chip (B). Series is GSE674. see
         * http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gds&term=GSE674[Accession]&cmd=search
         */
    ExpressionExperiment newee;
    try {
        Collection<?> results = geoService.fetchAndLoad("GSE674", false, true, false);
        newee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        log.info("Skipping test, data already exists in db");
        return;
    }
    assertNotNull(newee);
    newee = eeService.thaw(newee);
    /*
         * Test for bug 468 (merging of subsets across GDS's)
         */
    ExperimentalFactor factor = newee.getExperimentalDesign().getExperimentalFactors().iterator().next();
    // otherwise get 4.
    assertEquals(2, factor.getFactorValues().size());
    Collection<RawExpressionDataVector> vectors = newee.getRawExpressionDataVectors();
    rawExpressionDataVectorService.thaw(vectors);
    ExpressionDataMatrixBuilder builder = new ExpressionDataMatrixBuilder(vectors);
    ExpressionDataMatrix<Double> matrix = builder.getPreferredData();
    assertNotNull(matrix);
    assertEquals(31, matrix.rows());
    assertEquals(15, matrix.columns());
    // GSM10363 = D1-U133B
    this.testMatrixValue(newee, matrix, "200000_s_at", "GSM10363", 5722.0);
    // GSM10380 = C7-U133A
    this.testMatrixValue(newee, matrix, "1007_s_at", "GSM10380", 1272.0);
}
Also used : ExpressionDataMatrixBuilder(ubic.gemma.core.analysis.preprocess.ExpressionDataMatrixBuilder) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 12 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class GeoDatasetServiceTest method testMatrixValue.

private void testMatrixValue(ExpressionExperiment exp, ExpressionDataMatrix<Double> matrix, String probeToTest, String sampleToTest, double expectedValue) {
    CompositeSequence soughtDesignElement = null;
    BioAssay soughtBioAssay = null;
    Collection<RawExpressionDataVector> vectors = exp.getRawExpressionDataVectors();
    for (DesignElementDataVector vector : vectors) {
        CompositeSequence de = vector.getDesignElement();
        if (de.getName().equals(probeToTest)) {
            soughtDesignElement = de;
        }
        BioAssayDimension bad = vector.getBioAssayDimension();
        for (BioAssay ba : bad.getBioAssays()) {
            if (ba.getAccession().getAccession().equals(sampleToTest)) {
                soughtBioAssay = ba;
            }
        }
    }
    if (soughtDesignElement == null || soughtBioAssay == null)
        fail("didn't find values for " + sampleToTest);
    Double actualValue = matrix.get(soughtDesignElement, soughtBioAssay);
    assertNotNull("No value for " + soughtBioAssay, actualValue);
    assertEquals(expectedValue, actualValue, 0.00001);
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 13 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class TwoChannelMissingValuesTest method testMissingValueGSE56.

/**
 * GSE56 is corrupt: there is no Channel 1 signal value in the data file.
 */
@Test
public void testMissingValueGSE56() throws Exception {
    ExpressionExperiment old = eeService.findByShortName("GSE56");
    if (old != null)
        eeService.remove(old);
    InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/GSE56Short/GSE56_family.soft.gz"));
    GeoFamilyParser parser = new GeoFamilyParser();
    parser.parse(is);
    GeoSeries series = ((GeoParseResult) parser.getResults().iterator().next()).getSeriesMap().get("GSE56");
    DatasetCombiner datasetCombiner = new DatasetCombiner();
    GeoSampleCorrespondence correspondence = datasetCombiner.findGSECorrespondence(series);
    series.setSampleCorrespondence(correspondence);
    Object result = this.gc.convert(series);
    assertNotNull(result);
    ExpressionExperiment expExp = (ExpressionExperiment) ((Collection<?>) result).iterator().next();
    expExp = persisterHelper.persist(expExp, persisterHelper.prepare(expExp));
    Collection<RawExpressionDataVector> calls = tcmv.computeMissingValues(expExp, 2.0, new ArrayList<Double>());
    assertEquals(10, calls.size());
}
Also used : GeoSeries(ubic.gemma.core.loader.expression.geo.model.GeoSeries) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) GZIPInputStream(java.util.zip.GZIPInputStream) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) Collection(java.util.Collection) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Example 14 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class TwoChannelMissingValuesTest method testMissingValue.

@Test
public void testMissingValue() throws Exception {
    ExpressionExperiment old = eeService.findByShortName("GSE2221");
    if (old != null)
        eeService.remove(old);
    InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/shortGenePix/GSE2221_family.soft.gz"));
    GeoFamilyParser parser = new GeoFamilyParser();
    parser.parse(is);
    GeoSeries series = ((GeoParseResult) parser.getResults().iterator().next()).getSeriesMap().get("GSE2221");
    DatasetCombiner datasetCombiner = new DatasetCombiner();
    GeoSampleCorrespondence correspondence = datasetCombiner.findGSECorrespondence(series);
    series.setSampleCorrespondence(correspondence);
    Object result = this.gc.convert(series);
    assertNotNull(result);
    ExpressionExperiment expExp = (ExpressionExperiment) ((Collection<?>) result).iterator().next();
    expExp = persisterHelper.persist(expExp, persisterHelper.prepare(expExp));
    Collection<RawExpressionDataVector> calls = tcmv.computeMissingValues(expExp, 2.0, new ArrayList<Double>());
    assertEquals(500, calls.size());
    BioAssayDimension dim = calls.iterator().next().getBioAssayDimension();
    // Spot check the results. For sample ME-TMZ, ID #27 should be 'true' and 26 should be false.
    ByteArrayConverter bac = new ByteArrayConverter();
    boolean foundA = false;
    boolean foundB = false;
    for (DesignElementDataVector vector : calls) {
        if (vector.getDesignElement().getName().equals("26")) {
            byte[] dat = vector.getData();
            boolean[] row = bac.byteArrayToBooleans(dat);
            int i = 0;
            for (BioAssay bas : dim.getBioAssays()) {
                if (bas.getName().equals("expression array ME-TMZ")) {
                    assertTrue(!row[i]);
                    foundA = true;
                }
                i++;
            }
        }
        if (vector.getDesignElement().getName().equals("27")) {
            byte[] dat = vector.getData();
            boolean[] row = bac.byteArrayToBooleans(dat);
            int i = 0;
            for (BioAssay bas : dim.getBioAssays()) {
                if (bas.getName().equals("expression array ME-TMZ")) {
                    assertTrue(row[i]);
                    foundB = true;
                }
                i++;
            }
        }
    }
    assertTrue(foundA && foundB);
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) GeoSeries(ubic.gemma.core.loader.expression.geo.model.GeoSeries) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) GZIPInputStream(java.util.zip.GZIPInputStream) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) Collection(java.util.Collection) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Example 15 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class TwoChannelMissingValuesTest method testMissingValueGSE5091.

/**
 * GSE56 is corrupt: there is no Channel 1 signal value in the data file.
 */
@Test
public void testMissingValueGSE5091() throws Exception {
    ExpressionExperiment old = eeService.findByShortName("GSE5091");
    if (old != null)
        eeService.remove(old);
    InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/GSE5091Short/GSE5091_family.soft.gz"));
    GeoFamilyParser parser = new GeoFamilyParser();
    parser.parse(is);
    GeoSeries series = ((GeoParseResult) parser.getResults().iterator().next()).getSeriesMap().get("GSE5091");
    DatasetCombiner datasetCombiner = new DatasetCombiner();
    GeoSampleCorrespondence correspondence = datasetCombiner.findGSECorrespondence(series);
    series.setSampleCorrespondence(correspondence);
    gc = this.getBean(GeoConverter.class);
    Object result = this.gc.convert(series);
    assertNotNull(result);
    ExpressionExperiment expExp = (ExpressionExperiment) ((Collection<?>) result).iterator().next();
    expExp = persisterHelper.persist(expExp, persisterHelper.prepare(expExp));
    Collection<RawExpressionDataVector> calls = tcmv.computeMissingValues(expExp, 2.0, new ArrayList<Double>());
    assertEquals(10, calls.size());
}
Also used : GeoSeries(ubic.gemma.core.loader.expression.geo.model.GeoSeries) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) GZIPInputStream(java.util.zip.GZIPInputStream) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) Collection(java.util.Collection) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Aggregations

RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)53 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)18 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)16 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)16 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)16 Test (org.junit.Test)15 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)13 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)12 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)9 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)7 InputStream (java.io.InputStream)6 Collection (java.util.Collection)6 HashSet (java.util.HashSet)6 GZIPInputStream (java.util.zip.GZIPInputStream)6 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)6 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)6 GeoSeries (ubic.gemma.core.loader.expression.geo.model.GeoSeries)6 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)6 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)5 Transactional (org.springframework.transaction.annotation.Transactional)4